From 94afb714ed22185e9dc7292923ad21fc9952cebe Mon Sep 17 00:00:00 2001 From: lidezhu <47731263+lidezhu@users.noreply.github.com> Date: Mon, 23 May 2022 18:17:53 +0800 Subject: [PATCH 1/7] flush cache before segment merge (#4955) * flush cache before segment merge * keep flush until success * check whether segment is valid if flush failed * Update dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp Co-authored-by: JaySon * add more fix * check flush result in segment::write Co-authored-by: JaySon --- .../Storages/DeltaMerge/DeltaMergeStore.cpp | 23 +++++++++++ dbms/src/Storages/DeltaMerge/Segment.cpp | 38 ++++++++++++++++++- dbms/src/Storages/DeltaMerge/Segment.h | 3 +- .../DeltaMerge/tests/gtest_dm_segment.cpp | 28 +++++++++----- 4 files changed, 80 insertions(+), 12 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 997db601d1e..27de092c26a 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -1971,6 +1971,29 @@ void DeltaMergeStore::segmentMerge(DMContext & dm_context, const SegmentPtr & le right->info(), dm_context.min_version); + /// This segment may contain some rows that not belong to this segment range which is left by previous split operation. + /// And only saved data in this segment will be filtered by the segment range in the merge process, + /// unsaved data will be directly copied to the new segment. + /// So we flush here to make sure that all potential data left by previous split operation is saved. + while (!left->flushCache(dm_context)) + { + // keep flush until success if not abandoned + if (left->hasAbandoned()) + { + LOG_FMT_DEBUG(log, "Give up merge segments left [{}], right [{}]", left->segmentId(), right->segmentId()); + return; + } + } + while (!right->flushCache(dm_context)) + { + // keep flush until success if not abandoned + if (right->hasAbandoned()) + { + LOG_FMT_DEBUG(log, "Give up merge segments left [{}], right [{}]", left->segmentId(), right->segmentId()); + return; + } + } + SegmentSnapshotPtr left_snap; SegmentSnapshotPtr right_snap; ColumnDefinesPtr schema_snap; diff --git a/dbms/src/Storages/DeltaMerge/Segment.cpp b/dbms/src/Storages/DeltaMerge/Segment.cpp index 9e195b2b25d..8398fdcee40 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.cpp +++ b/dbms/src/Storages/DeltaMerge/Segment.cpp @@ -306,7 +306,7 @@ bool Segment::writeToCache(DMContext & dm_context, const Block & block, size_t o return delta->appendToCache(dm_context, block, offset, limit); } -bool Segment::write(DMContext & dm_context, const Block & block) +bool Segment::write(DMContext & dm_context, const Block & block, bool flush_cache) { LOG_FMT_TRACE(log, "Segment [{}] write to disk rows: {}", segment_id, block.rows()); WriteBatches wbs(dm_context.storage_pool, dm_context.getWriteLimiter()); @@ -316,7 +316,14 @@ bool Segment::write(DMContext & dm_context, const Block & block) if (delta->appendColumnFile(dm_context, column_file)) { - flushCache(dm_context); + if (flush_cache) + { + while (!flushCache(dm_context)) + { + if (hasAbandoned()) + return false; + } + } return true; } else @@ -1129,6 +1136,29 @@ SegmentPair Segment::applySplit(DMContext & dm_context, // SegmentPtr Segment::merge(DMContext & dm_context, const ColumnDefinesPtr & schema_snap, const SegmentPtr & left, const SegmentPtr & right) { WriteBatches wbs(dm_context.storage_pool, dm_context.getWriteLimiter()); + /// This segment may contain some rows that not belong to this segment range which is left by previous split operation. + /// And only saved data in this segment will be filtered by the segment range in the merge process, + /// unsaved data will be directly copied to the new segment. + /// So we flush here to make sure that all potential data left by previous split operation is saved. + while (!left->flushCache(dm_context)) + { + // keep flush until success if not abandoned + if (left->hasAbandoned()) + { + LOG_FMT_DEBUG(left->log, "Give up merge segments left [{}], right [{}]", left->segmentId(), right->segmentId()); + return {}; + } + } + while (!right->flushCache(dm_context)) + { + // keep flush until success if not abandoned + if (right->hasAbandoned()) + { + LOG_FMT_DEBUG(right->log, "Give up merge segments left [{}], right [{}]", left->segmentId(), right->segmentId()); + return {}; + } + } + auto left_snap = left->createSnapshot(dm_context, true, CurrentMetrics::DT_SnapshotOfSegmentMerge); auto right_snap = right->createSnapshot(dm_context, true, CurrentMetrics::DT_SnapshotOfSegmentMerge); @@ -1149,6 +1179,10 @@ SegmentPtr Segment::merge(DMContext & dm_context, const ColumnDefinesPtr & schem return merged; } +/// Segments may contain some rows that not belong to its range which is left by previous split operation. +/// And only saved data in the segment will be filtered by the segment range in the merge process, +/// unsaved data will be directly copied to the new segment. +/// So remember to do a flush for the segments before merge. StableValueSpacePtr Segment::prepareMerge(DMContext & dm_context, // const ColumnDefinesPtr & schema_snap, const SegmentPtr & left, diff --git a/dbms/src/Storages/DeltaMerge/Segment.h b/dbms/src/Storages/DeltaMerge/Segment.h index 3ad29ee14a5..cccfc5091b9 100644 --- a/dbms/src/Storages/DeltaMerge/Segment.h +++ b/dbms/src/Storages/DeltaMerge/Segment.h @@ -144,7 +144,8 @@ class Segment : private boost::noncopyable bool writeToCache(DMContext & dm_context, const Block & block, size_t offset, size_t limit); /// For test only. - bool write(DMContext & dm_context, const Block & block); + bool write(DMContext & dm_context, const Block & block, bool flush_cache = true); + bool write(DMContext & dm_context, const RowKeyRange & delete_range); bool ingestColumnFiles(DMContext & dm_context, const RowKeyRange & range, const ColumnFiles & column_files, bool clear_data_in_range); diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp index 6bf33465366..5726cfa132d 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_segment.cpp @@ -949,11 +949,17 @@ CATCH TEST_F(Segment_test, Split) try { - const size_t num_rows_write = 100; + const size_t num_rows_write_per_batch = 100; + const size_t num_rows_write = num_rows_write_per_batch * 2; { - // write to segment - Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); - segment->write(dmContext(), std::move(block)); + // write to segment and flush + Block block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write_per_batch, false); + segment->write(dmContext(), std::move(block), true); + } + { + // write to segment and don't flush + Block block = DMTestEnv::prepareSimpleWriteBlock(num_rows_write_per_batch, 2 * num_rows_write_per_batch, false); + segment->write(dmContext(), std::move(block), false); } { @@ -989,7 +995,7 @@ try size_t num_rows_seg2 = 0; { { - auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); + auto in = segment->getInputStream(dmContext(), *tableColumns(), {segment->getRowKeyRange()}); in->readPrefix(); while (Block block = in->read()) { @@ -998,7 +1004,7 @@ try in->readSuffix(); } { - auto in = segment->getInputStream(dmContext(), *tableColumns(), {RowKeyRange::newAll(false, 1)}); + auto in = new_segment->getInputStream(dmContext(), *tableColumns(), {new_segment->getRowKeyRange()}); in->readPrefix(); while (Block block = in->read()) { @@ -1009,9 +1015,13 @@ try ASSERT_EQ(num_rows_seg1 + num_rows_seg2, num_rows_write); } + // delete rows in the right segment + { + new_segment->write(dmContext(), /*delete_range*/ new_segment->getRowKeyRange()); + new_segment->flushCache(dmContext()); + } + // merge segments - // TODO: enable merge test! - if (false) { segment = Segment::merge(dmContext(), tableColumns(), segment, new_segment); { @@ -1030,7 +1040,7 @@ try num_rows_read += block.rows(); } in->readSuffix(); - EXPECT_EQ(num_rows_read, num_rows_write); + EXPECT_EQ(num_rows_read, num_rows_seg1); } } } From 1dfe7fc9eba8091348314a52f495a8206ce4a02c Mon Sep 17 00:00:00 2001 From: jiaqizho Date: Mon, 23 May 2022 19:08:46 +0800 Subject: [PATCH 2/7] Add a global max_id to fix reuse page_id problem (#4948) close pingcap/tiflash#4939 --- .../Storages/DeltaMerge/DeltaMergeStore.cpp | 13 +- dbms/src/Storages/DeltaMerge/StoragePool.cpp | 24 ++-- dbms/src/Storages/DeltaMerge/StoragePool.h | 11 +- dbms/src/Storages/Page/PageStorage.h | 12 +- dbms/src/Storages/Page/V2/PageStorage.cpp | 6 +- dbms/src/Storages/Page/V2/PageStorage.h | 2 +- dbms/src/Storages/Page/V3/PageDirectory.cpp | 49 ++------ dbms/src/Storages/Page/V3/PageDirectory.h | 3 +- .../Storages/Page/V3/PageDirectoryFactory.cpp | 4 +- .../Storages/Page/V3/PageDirectoryFactory.h | 1 - dbms/src/Storages/Page/V3/PageStorageImpl.cpp | 4 +- dbms/src/Storages/Page/V3/PageStorageImpl.h | 2 +- dbms/src/Storages/Page/V3/WAL/serialize.cpp | 4 +- .../Page/V3/tests/gtest_page_directory.cpp | 111 ------------------ .../Page/V3/tests/gtest_page_storage.cpp | 51 ++++++++ 15 files changed, 115 insertions(+), 182 deletions(-) diff --git a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp index 27de092c26a..a74404f3dbb 100644 --- a/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp +++ b/dbms/src/Storages/DeltaMerge/DeltaMergeStore.cpp @@ -244,10 +244,19 @@ DeltaMergeStore::DeltaMergeStore(Context & db_context, if (const auto first_segment_entry = storage_pool->metaReader()->getPageEntry(DELTA_MERGE_FIRST_SEGMENT_ID); !first_segment_entry.isValid()) { - // Create the first segment. auto segment_id = storage_pool->newMetaPageId(); if (segment_id != DELTA_MERGE_FIRST_SEGMENT_ID) - throw Exception(fmt::format("The first segment id should be {}", DELTA_MERGE_FIRST_SEGMENT_ID), ErrorCodes::LOGICAL_ERROR); + { + if (page_storage_run_mode == PageStorageRunMode::ONLY_V2) + { + throw Exception(fmt::format("The first segment id should be {}", DELTA_MERGE_FIRST_SEGMENT_ID), ErrorCodes::LOGICAL_ERROR); + } + + // In ONLY_V3 or MIX_MODE, If create a new DeltaMergeStore + // Should used fixed DELTA_MERGE_FIRST_SEGMENT_ID to create first segment + segment_id = DELTA_MERGE_FIRST_SEGMENT_ID; + } + auto first_segment = Segment::newSegment(*dm_context, store_columns, RowKeyRange::newAll(is_common_handle, rowkey_column_size), segment_id, 0); segments.emplace(first_segment->getRowKeyRange().getEnd(), first_segment); diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.cpp b/dbms/src/Storages/DeltaMerge/StoragePool.cpp index fbb631064a7..a040c5b6c6a 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.cpp +++ b/dbms/src/Storages/DeltaMerge/StoragePool.cpp @@ -378,18 +378,18 @@ PageStorageRunMode StoragePool::restore() data_storage_v2->restore(); meta_storage_v2->restore(); - max_log_page_id = log_storage_v2->getMaxId(ns_id); - max_data_page_id = data_storage_v2->getMaxId(ns_id); - max_meta_page_id = meta_storage_v2->getMaxId(ns_id); + max_log_page_id = log_storage_v2->getMaxId(); + max_data_page_id = data_storage_v2->getMaxId(); + max_meta_page_id = meta_storage_v2->getMaxId(); storage_pool_metrics = CurrentMetrics::Increment{CurrentMetrics::StoragePoolV2Only}; break; } case PageStorageRunMode::ONLY_V3: { - max_log_page_id = log_storage_v3->getMaxId(ns_id); - max_data_page_id = data_storage_v3->getMaxId(ns_id); - max_meta_page_id = meta_storage_v3->getMaxId(ns_id); + max_log_page_id = log_storage_v3->getMaxId(); + max_data_page_id = data_storage_v3->getMaxId(); + max_meta_page_id = meta_storage_v3->getMaxId(); storage_pool_metrics = CurrentMetrics::Increment{CurrentMetrics::StoragePoolV3Only}; break; @@ -456,18 +456,18 @@ PageStorageRunMode StoragePool::restore() data_storage_writer = std::make_shared(PageStorageRunMode::ONLY_V3, /*storage_v2_*/ nullptr, data_storage_v3); meta_storage_writer = std::make_shared(PageStorageRunMode::ONLY_V3, /*storage_v2_*/ nullptr, meta_storage_v3); - max_log_page_id = log_storage_v3->getMaxId(ns_id); - max_data_page_id = data_storage_v3->getMaxId(ns_id); - max_meta_page_id = meta_storage_v3->getMaxId(ns_id); + max_log_page_id = log_storage_v3->getMaxId(); + max_data_page_id = data_storage_v3->getMaxId(); + max_meta_page_id = meta_storage_v3->getMaxId(); run_mode = PageStorageRunMode::ONLY_V3; storage_pool_metrics = CurrentMetrics::Increment{CurrentMetrics::StoragePoolV3Only}; } else // Still running Mix Mode { - max_log_page_id = std::max(log_storage_v2->getMaxId(ns_id), log_storage_v3->getMaxId(ns_id)); - max_data_page_id = std::max(data_storage_v2->getMaxId(ns_id), data_storage_v3->getMaxId(ns_id)); - max_meta_page_id = std::max(meta_storage_v2->getMaxId(ns_id), meta_storage_v3->getMaxId(ns_id)); + max_log_page_id = std::max(log_storage_v2->getMaxId(), log_storage_v3->getMaxId()); + max_data_page_id = std::max(data_storage_v2->getMaxId(), data_storage_v3->getMaxId()); + max_meta_page_id = std::max(meta_storage_v2->getMaxId(), meta_storage_v3->getMaxId()); storage_pool_metrics = CurrentMetrics::Increment{CurrentMetrics::StoragePoolMixMode}; } break; diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.h b/dbms/src/Storages/DeltaMerge/StoragePool.h index 7ba6dd85995..d05454a5431 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.h +++ b/dbms/src/Storages/DeltaMerge/StoragePool.h @@ -149,11 +149,18 @@ class StoragePool : private boost::noncopyable // Caller must cancel gc tasks before drop void drop(); - PageId newDataPageIdForDTFile(StableDiskDelegator & delegator, const char * who); + // For function `newLogPageId`,`newMetaPageId`,`newDataPageIdForDTFile`: + // For PageStorageRunMode::ONLY_V2, every table have its own three PageStorage (meta/data/log). + // So these functions return the Page id starts from 1 and is continuously incremented. + // For PageStorageRunMode::ONLY_V3/MIX_MODE, PageStorage is global(distinguish by ns_id for different table). + // In order to avoid Page id from being reused (and cause troubles while restoring WAL from disk), + // StoragePool will assign the max_log_page_id/max_meta_page_id/max_data_page_id by the global max id + // regardless of ns_id while being restored. This causes the ids in a table to not be continuously incremented. - PageId maxMetaPageId() { return max_meta_page_id; } + PageId newDataPageIdForDTFile(StableDiskDelegator & delegator, const char * who); PageId newLogPageId() { return ++max_log_page_id; } PageId newMetaPageId() { return ++max_meta_page_id; } + #ifndef DBMS_PUBLIC_GTEST private: #endif diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index 481888bdf33..479c368a585 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -233,7 +233,17 @@ class PageStorage : private boost::noncopyable virtual void drop() = 0; - virtual PageId getMaxId(NamespaceId ns_id) = 0; + // Get the max id from PageStorage. + // + // For V2, every table have its own three PageStorage (meta/data/log). + // So this function return the Page id starts from 0 and is continuously incremented to + // new pages. + // For V3, PageStorage is global(distinguish by ns_id for different table). + // In order to avoid Page id from being reused (and cause troubles while restoring WAL from disk), + // this function returns the global max id regardless of ns_id. This causes the ids in a table + // to not be continuously incremented. + // Note that Page id 1 in each ns_id is special. + virtual PageId getMaxId() = 0; virtual SnapshotPtr getSnapshot(const String & tracing_id) = 0; diff --git a/dbms/src/Storages/Page/V2/PageStorage.cpp b/dbms/src/Storages/Page/V2/PageStorage.cpp index 3ab62d55242..7a23afb11d4 100644 --- a/dbms/src/Storages/Page/V2/PageStorage.cpp +++ b/dbms/src/Storages/Page/V2/PageStorage.cpp @@ -355,7 +355,7 @@ void PageStorage::restore() LOG_FMT_INFO(log, "{} restore {} pages, write batch sequence: {}, {}", storage_name, num_pages, write_batch_seq, statistics.toString()); } -PageId PageStorage::getMaxId(NamespaceId /*ns_id*/) +PageId PageStorage::getMaxId() { std::lock_guard write_lock(write_mutex); return versioned_page_entries.getSnapshot("")->version()->maxId(); @@ -893,9 +893,9 @@ void PageStorage::drop() struct GcContext { PageFileIdAndLevel min_file_id; - PageFile::Type min_file_type; + PageFile::Type min_file_type = PageFile::Type::Invalid; PageFileIdAndLevel max_file_id; - PageFile::Type max_file_type; + PageFile::Type max_file_type = PageFile::Type::Invalid; size_t num_page_files = 0; size_t num_legacy_files = 0; diff --git a/dbms/src/Storages/Page/V2/PageStorage.h b/dbms/src/Storages/Page/V2/PageStorage.h index cb55a769f37..b9e16fd1775 100644 --- a/dbms/src/Storages/Page/V2/PageStorage.h +++ b/dbms/src/Storages/Page/V2/PageStorage.h @@ -95,7 +95,7 @@ class PageStorage : public DB::PageStorage void drop() override; - PageId getMaxId(NamespaceId ns_id) override; + PageId getMaxId() override; PageId getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot, bool throw_on_not_exist) override; diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index 06b26156529..64a3fead674 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -685,7 +685,8 @@ void VersionedPageEntries::collapseTo(const UInt64 seq, const PageIdV3Internal p *************************/ PageDirectory::PageDirectory(String storage_name, WALStorePtr && wal_, UInt64 max_persisted_log_files_) - : sequence(0) + : max_page_id(0) + , sequence(0) , wal(std::move(wal_)) , max_persisted_log_files(max_persisted_log_files_) , log(Logger::get("PageDirectory", std::move(storage_name))) @@ -923,49 +924,10 @@ PageIdV3Internal PageDirectory::getNormalPageId(PageIdV3Internal page_id, const } } -PageId PageDirectory::getMaxId(NamespaceId ns_id) const +PageId PageDirectory::getMaxId() const { std::shared_lock read_lock(table_rw_mutex); - PageIdV3Internal upper_bound = buildV3Id(ns_id, UINT64_MAX); - - auto iter = mvcc_table_directory.upper_bound(upper_bound); - if (iter == mvcc_table_directory.begin()) - { - // The smallest page id is greater than the target page id or mvcc_table_directory is empty, - // and it means no page id is less than or equal to the target page id, return 0. - return 0; - } - else - { - // iter is not at the beginning and mvcc_table_directory is not empty, - // so iter-- must be a valid iterator, and it's the largest page id which is smaller than the target page id. - iter--; - - do - { - // Can't find any entries in current ns_id - if (iter->first.high != ns_id) - { - break; - } - - // Check and return whether this id is visible, otherwise continue to check the previous one. - if (iter->second->isVisible(UINT64_MAX - 1)) - { - return iter->first.low; - } - - // Current entry/ref/external is deleted and there are no entries before it. - if (iter == mvcc_table_directory.begin()) - { - break; - } - - iter--; - } while (true); - - return 0; - } + return max_page_id; } std::set PageDirectory::getAllPageIds() @@ -1069,6 +1031,9 @@ void PageDirectory::apply(PageEntriesEdit && edit, const WriteLimiterPtr & write // stage 2, create entry version list for page_id. for (const auto & r : edit.getRecords()) { + // Protected in write_lock + max_page_id = std::max(max_page_id, r.page_id.low); + auto [iter, created] = mvcc_table_directory.insert(std::make_pair(r.page_id, nullptr)); if (created) { diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index 14833245c7a..a3c6b079fee 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -347,7 +347,7 @@ class PageDirectory } #endif - PageId getMaxId(NamespaceId ns_id) const; + PageId getMaxId() const; std::set getAllPageIds(); @@ -397,6 +397,7 @@ class PageDirectory } private: + PageId max_page_id; std::atomic sequence; mutable std::shared_mutex table_rw_mutex; MVCCMapType mvcc_table_directory; diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 40b12b64f06..0592d1ddaa8 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -40,6 +40,7 @@ PageDirectoryPtr PageDirectoryFactory::create(String storage_name, FileProviderP // After restoring from the disk, we need cleanup all invalid entries in memory, or it will // try to run GC again on some entries that are already marked as invalid in BlobStore. dir->gcInMemEntries(); + LOG_FMT_INFO(DB::Logger::get("PageDirectoryFactory"), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxId(), dir->sequence); if (blob_stats) { @@ -111,7 +112,6 @@ void PageDirectoryFactory::loadEdit(const PageDirectoryPtr & dir, const PageEntr { if (max_applied_ver < r.version) max_applied_ver = r.version; - max_applied_page_id = std::max(r.page_id, max_applied_page_id); // We can not avoid page id from being reused under some corner situation. Try to do gcInMemEntries // and apply again to resolve the error. @@ -135,6 +135,8 @@ bool PageDirectoryFactory::applyRecord( iter->second = std::make_shared(); } + dir->max_page_id = std::max(dir->max_page_id, r.page_id.low); + const auto & version_list = iter->second; const auto & restored_version = r.version; try diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.h b/dbms/src/Storages/Page/V3/PageDirectoryFactory.h index 11337e4a6cc..e4b76bfba0d 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.h +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.h @@ -38,7 +38,6 @@ class PageDirectoryFactory { public: PageVersion max_applied_ver; - PageIdV3Internal max_applied_page_id; PageDirectoryFactory & setBlobStore(BlobStore & blob_store) { diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp index 8aa9f92675c..58fe4b4dd4c 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp @@ -55,9 +55,9 @@ void PageStorageImpl::restore() .create(storage_name, file_provider, delegator, parseWALConfig(config)); } -PageId PageStorageImpl::getMaxId(NamespaceId ns_id) +PageId PageStorageImpl::getMaxId() { - return page_directory->getMaxId(ns_id); + return page_directory->getMaxId(); } void PageStorageImpl::drop() diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.h b/dbms/src/Storages/Page/V3/PageStorageImpl.h index f3b696d0351..082adb8df34 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.h +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.h @@ -64,7 +64,7 @@ class PageStorageImpl : public DB::PageStorage void drop() override; - PageId getMaxId(NamespaceId ns_id) override; + PageId getMaxId() override; PageId getNormalPageIdImpl(NamespaceId ns_id, PageId page_id, SnapshotPtr snapshot, bool throw_on_not_exist) override; diff --git a/dbms/src/Storages/Page/V3/WAL/serialize.cpp b/dbms/src/Storages/Page/V3/WAL/serialize.cpp index f8e26617499..6b7bc9b8a21 100644 --- a/dbms/src/Storages/Page/V3/WAL/serialize.cpp +++ b/dbms/src/Storages/Page/V3/WAL/serialize.cpp @@ -218,7 +218,7 @@ void deserializeFrom(ReadBuffer & buf, PageEntriesEdit & edit) break; } default: - throw Exception(fmt::format("Unknown record type: {}", record_type)); + throw Exception(fmt::format("Unknown record type: {}", record_type), ErrorCodes::LOGICAL_ERROR); } } } @@ -261,7 +261,7 @@ PageEntriesEdit deserializeFrom(std::string_view record) UInt32 version = 0; readIntBinary(version, buf); if (version != 1) - throw Exception(""); + throw Exception(fmt::format("Unknown version for PageEntriesEdit deser [version={}]", version), ErrorCodes::LOGICAL_ERROR); deserializeFrom(buf, edit); return edit; diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index 4511cc8ddd7..dfa33824473 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -2399,117 +2399,6 @@ try } CATCH -TEST_F(PageDirectoryTest, GetMaxId) -try -{ - NamespaceId small = 20; - NamespaceId medium = 50; - NamespaceId large = 100; - ASSERT_EQ(dir->getMaxId(small), 0); - ASSERT_EQ(dir->getMaxId(medium), 0); - ASSERT_EQ(dir->getMaxId(large), 0); - - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - { - PageEntriesEdit edit; - edit.put(buildV3Id(small, 1), entry1); - edit.put(buildV3Id(large, 2), entry2); - dir->apply(std::move(edit)); - ASSERT_EQ(dir->getMaxId(small), 1); - ASSERT_EQ(dir->getMaxId(medium), 0); - ASSERT_EQ(dir->getMaxId(large), 2); - } - - PageEntryV3 entry3{.file_id = 3, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry4{.file_id = 4, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - { - PageEntriesEdit edit; - edit.put(buildV3Id(medium, 300), entry1); - edit.put(buildV3Id(medium, 320), entry2); - dir->apply(std::move(edit)); - ASSERT_EQ(dir->getMaxId(small), 1); - ASSERT_EQ(dir->getMaxId(medium), 320); - ASSERT_EQ(dir->getMaxId(large), 2); - } - - { - PageEntriesEdit edit; - edit.del(buildV3Id(medium, 320)); - dir->apply(std::move(edit)); - ASSERT_EQ(dir->getMaxId(medium), 300); - } - - { - PageEntriesEdit edit; - edit.del(buildV3Id(medium, 300)); - dir->apply(std::move(edit)); - ASSERT_EQ(dir->getMaxId(medium), 0); - } -} -CATCH - -TEST_F(PageDirectoryTest, GetMaxIdAfterDelete) -try -{ - /// test for deleting put - PageEntryV3 entry1{.file_id = 1, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry2{.file_id = 2, .size = 1024, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - { - PageEntriesEdit edit; - edit.put(1, entry1); - edit.put(2, entry2); - dir->apply(std::move(edit)); - } - - ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 2); - - { - PageEntriesEdit edit; - edit.del(2); - dir->apply(std::move(edit)); - } - ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 1); - - { - PageEntriesEdit edit; - edit.del(1); - dir->apply(std::move(edit)); - } - ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 0); - - dir->gcInMemEntries(); - ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 0); - - /// test for deleting put_ext/ref - - { - PageEntriesEdit edit; - edit.putExternal(1); - edit.ref(2, 1); - dir->apply(std::move(edit)); - } - - { - PageEntriesEdit edit; - edit.del(1); - dir->apply(std::move(edit)); - } - ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 2); - dir->gcInMemEntries(); - ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 2); - - { - PageEntriesEdit edit; - edit.del(2); - dir->apply(std::move(edit)); - } - ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 0); - dir->gcInMemEntries(); - ASSERT_EQ(dir->getMaxId(TEST_NAMESPACE_ID), 0); -} -CATCH - #undef INSERT_ENTRY_TO #undef INSERT_ENTRY #undef INSERT_ENTRY_ACQ_SNAP diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index 91dfcaac6a8..498fd4124e5 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -1329,5 +1329,56 @@ try } CATCH +TEST_F(PageStorageTest, GetMaxId) +try +{ + NamespaceId small = 20; + NamespaceId medium = 50; + NamespaceId large = 100; + + { + WriteBatch batch{small}; + batch.putExternal(1, 0); + batch.putExternal(1999, 0); + batch.putExternal(2000, 0); + page_storage->write(std::move(batch)); + ASSERT_EQ(page_storage->getMaxId(), 2000); + } + + { + page_storage = reopenWithConfig(config); + ASSERT_EQ(page_storage->getMaxId(), 2000); + } + + { + WriteBatch batch{medium}; + batch.putExternal(1, 0); + batch.putExternal(100, 0); + batch.putExternal(200, 0); + page_storage->write(std::move(batch)); + ASSERT_EQ(page_storage->getMaxId(), 2000); + } + + { + page_storage = reopenWithConfig(config); + ASSERT_EQ(page_storage->getMaxId(), 2000); + } + + { + WriteBatch batch{large}; + batch.putExternal(1, 0); + batch.putExternal(20000, 0); + batch.putExternal(20001, 0); + page_storage->write(std::move(batch)); + ASSERT_EQ(page_storage->getMaxId(), 20001); + } + + { + page_storage = reopenWithConfig(config); + ASSERT_EQ(page_storage->getMaxId(), 20001); + } +} +CATCH + } // namespace PS::V3::tests } // namespace DB From 350323d36167e30c85dac7f399fd920334fa13d3 Mon Sep 17 00:00:00 2001 From: Zhuhe Fang Date: Mon, 23 May 2022 19:40:46 +0800 Subject: [PATCH 3/7] Schedule: track the waiting tasks with task ID, and deleted the scheduled task with exceeded state from the waiting tasks queue (#4958) close pingcap/tiflash#4954 --- dbms/src/Flash/Mpp/MPPTaskManager.h | 2 +- dbms/src/Flash/Mpp/MinTSOScheduler.cpp | 22 ++++++++++++++++------ dbms/src/Flash/Mpp/MinTSOScheduler.h | 2 +- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/dbms/src/Flash/Mpp/MPPTaskManager.h b/dbms/src/Flash/Mpp/MPPTaskManager.h index 024dd4f3a59..d7047804aca 100644 --- a/dbms/src/Flash/Mpp/MPPTaskManager.h +++ b/dbms/src/Flash/Mpp/MPPTaskManager.h @@ -35,7 +35,7 @@ struct MPPQueryTaskSet bool to_be_cancelled = false; MPPTaskMap task_map; /// only used in scheduler - std::queue waiting_tasks; + std::queue waiting_tasks; }; using MPPQueryTaskSetPtr = std::shared_ptr; diff --git a/dbms/src/Flash/Mpp/MinTSOScheduler.cpp b/dbms/src/Flash/Mpp/MinTSOScheduler.cpp index 9fbe4b7b7cb..af525bd1a55 100644 --- a/dbms/src/Flash/Mpp/MinTSOScheduler.cpp +++ b/dbms/src/Flash/Mpp/MinTSOScheduler.cpp @@ -72,7 +72,8 @@ bool MinTSOScheduler::tryToSchedule(const MPPTaskPtr & task, MPPTaskManager & ta LOG_FMT_WARNING(log, "{} is scheduled with miss or cancellation.", id.toString()); return true; } - return scheduleImp(id.start_ts, query_task_set, task, false); + bool has_error = false; + return scheduleImp(id.start_ts, query_task_set, task, false, has_error); } /// after finishing the query, there would be no threads released soon, so the updated min-tso query with waiting tasks should be scheduled. @@ -97,7 +98,9 @@ void MinTSOScheduler::deleteQuery(const UInt64 tso, MPPTaskManager & task_manage { while (!query_task_set->waiting_tasks.empty()) { - query_task_set->waiting_tasks.front()->scheduleThisTask(MPPTask::ScheduleState::FAILED); + auto task_it = query_task_set->task_map.find(query_task_set->waiting_tasks.front()); + if (task_it != query_task_set->task_map.end() && task_it->second != nullptr) + task_it->second->scheduleThisTask(MPPTask::ScheduleState::FAILED); query_task_set->waiting_tasks.pop(); GET_METRIC(tiflash_task_scheduler, type_waiting_tasks_count).Decrement(); } @@ -153,9 +156,14 @@ void MinTSOScheduler::scheduleWaitingQueries(MPPTaskManager & task_manager) /// schedule tasks one by one while (!query_task_set->waiting_tasks.empty()) { - auto task = query_task_set->waiting_tasks.front(); - if (!scheduleImp(current_query_id, query_task_set, task, true)) + auto task_it = query_task_set->task_map.find(query_task_set->waiting_tasks.front()); + bool has_error = false; + if (task_it != query_task_set->task_map.end() && task_it->second != nullptr && !scheduleImp(current_query_id, query_task_set, task_it->second, true, has_error)) + { + if (has_error) + query_task_set->waiting_tasks.pop(); /// it should be pop from the waiting queue, because the task is scheduled with errors. return; + } query_task_set->waiting_tasks.pop(); GET_METRIC(tiflash_task_scheduler, type_waiting_tasks_count).Decrement(); } @@ -166,7 +174,7 @@ void MinTSOScheduler::scheduleWaitingQueries(MPPTaskManager & task_manager) } /// [directly schedule, from waiting set] * [is min_tso query, not] * [can schedule, can't] totally 8 cases. -bool MinTSOScheduler::scheduleImp(const UInt64 tso, const MPPQueryTaskSetPtr & query_task_set, const MPPTaskPtr & task, const bool isWaiting) +bool MinTSOScheduler::scheduleImp(const UInt64 tso, const MPPQueryTaskSetPtr & query_task_set, const MPPTaskPtr & task, const bool isWaiting, bool & has_error) { auto needed_threads = task->getNeededThreads(); auto check_for_new_min_tso = tso <= min_tso && estimated_thread_usage + needed_threads <= thread_hard_limit; @@ -187,6 +195,7 @@ bool MinTSOScheduler::scheduleImp(const UInt64 tso, const MPPQueryTaskSetPtr & q { if (tso <= min_tso) /// the min_tso query should fully run, otherwise throw errors here. { + has_error = true; auto msg = fmt::format("threads are unavailable for the query {} ({} min_tso {}) {}, need {}, but used {} of the thread hard limit {}, {} active and {} waiting queries.", tso, tso == min_tso ? "is" : "is newer than", min_tso, isWaiting ? "from the waiting set" : "when directly schedule it", needed_threads, estimated_thread_usage, thread_hard_limit, active_set.size(), waiting_set.size()); LOG_FMT_ERROR(log, "{}", msg); GET_METRIC(tiflash_task_scheduler, type_hard_limit_exceeded_count).Increment(); @@ -200,11 +209,12 @@ bool MinTSOScheduler::scheduleImp(const UInt64 tso, const MPPQueryTaskSetPtr & q { throw Exception(msg); } + return false; } if (!isWaiting) { waiting_set.insert(tso); - query_task_set->waiting_tasks.push(task); + query_task_set->waiting_tasks.push(task->getId()); GET_METRIC(tiflash_task_scheduler, type_waiting_queries_count).Set(waiting_set.size()); GET_METRIC(tiflash_task_scheduler, type_waiting_tasks_count).Increment(); } diff --git a/dbms/src/Flash/Mpp/MinTSOScheduler.h b/dbms/src/Flash/Mpp/MinTSOScheduler.h index 501aa772a33..17ab1f4dfa3 100644 --- a/dbms/src/Flash/Mpp/MinTSOScheduler.h +++ b/dbms/src/Flash/Mpp/MinTSOScheduler.h @@ -42,7 +42,7 @@ class MinTSOScheduler : private boost::noncopyable void releaseThreadsThenSchedule(const int needed_threads, MPPTaskManager & task_manager); private: - bool scheduleImp(const UInt64 tso, const MPPQueryTaskSetPtr & query_task_set, const MPPTaskPtr & task, const bool isWaiting); + bool scheduleImp(const UInt64 tso, const MPPQueryTaskSetPtr & query_task_set, const MPPTaskPtr & task, const bool isWaiting, bool & has_error); bool updateMinTSO(const UInt64 tso, const bool retired, const String msg); void scheduleWaitingQueries(MPPTaskManager & task_manager); bool isDisabled() From d1975b8bcbc8531b4f28033924ed05e9e2793d3d Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Mon, 23 May 2022 20:08:46 +0800 Subject: [PATCH 4/7] Enable LTO in release build process (#4924) ref pingcap/tiflash#4909 --- dbms/src/Common/TiFlashBuildInfo.cpp | 5 +++ .../include/common/config_common.h.in | 1 + .../scripts/build-tiflash-release.sh | 38 +++++++++++-------- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/dbms/src/Common/TiFlashBuildInfo.cpp b/dbms/src/Common/TiFlashBuildInfo.cpp index b18fb63d93e..ff46e04384d 100644 --- a/dbms/src/Common/TiFlashBuildInfo.cpp +++ b/dbms/src/Common/TiFlashBuildInfo.cpp @@ -95,6 +95,11 @@ std::string getEnabledFeatures() #else "unwind", #endif +#endif + +// THINLTO +#if ENABLE_THINLTO + "thinlto", #endif }; return fmt::format("{}", fmt::join(features.begin(), features.end(), " ")); diff --git a/libs/libcommon/include/common/config_common.h.in b/libs/libcommon/include/common/config_common.h.in index e11ec418e7c..46f167ea683 100644 --- a/libs/libcommon/include/common/config_common.h.in +++ b/libs/libcommon/include/common/config_common.h.in @@ -13,3 +13,4 @@ #cmakedefine01 ENABLE_FAILPOINTS #cmakedefine01 USE_UNWIND #cmakedefine01 USE_LLVM_LIBUNWIND +#cmakedefine01 ENABLE_THINLTO diff --git a/release-centos7-llvm/scripts/build-tiflash-release.sh b/release-centos7-llvm/scripts/build-tiflash-release.sh index d3eda1dff4e..bb62e4743f6 100755 --- a/release-centos7-llvm/scripts/build-tiflash-release.sh +++ b/release-centos7-llvm/scripts/build-tiflash-release.sh @@ -13,12 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. - CMAKE_BUILD_TYPE=$1 CMAKE_PREFIX_PATH=$2 if [[ -z ${CMAKE_BUILD_TYPE} ]]; then - CMAKE_BUILD_TYPE="RELWITHDEBINFO" + CMAKE_BUILD_TYPE="RELWITHDEBINFO" fi DEFAULT_CMAKE_PREFIX_PATH="/usr/local/" @@ -27,15 +26,22 @@ DEFINE_CMAKE_PREFIX_PATH="-DCMAKE_PREFIX_PATH=${DEFAULT_CMAKE_PREFIX_PATH}" # https://cmake.org/cmake/help/latest/variable/CMAKE_PREFIX_PATH.html # CMAKE_PREFIX_PATH should be semicolon-separated list if [[ ${CMAKE_PREFIX_PATH} ]]; then - DEFINE_CMAKE_PREFIX_PATH="-DCMAKE_PREFIX_PATH=\"${DEFAULT_CMAKE_PREFIX_PATH};${CMAKE_PREFIX_PATH}\"" - echo "CMAKE_PREFIX_PATH is \"${DEFAULT_CMAKE_PREFIX_PATH};${CMAKE_PREFIX_PATH}\"" + DEFINE_CMAKE_PREFIX_PATH="-DCMAKE_PREFIX_PATH=\"${DEFAULT_CMAKE_PREFIX_PATH};${CMAKE_PREFIX_PATH}\"" + echo "CMAKE_PREFIX_PATH is \"${DEFAULT_CMAKE_PREFIX_PATH};${CMAKE_PREFIX_PATH}\"" fi set -ueox pipefail -SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" -SRCPATH=$(cd ${SCRIPTPATH}/../..; pwd -P) +SCRIPTPATH="$( + cd "$(dirname "$0")" + pwd -P +)" +SRCPATH=$( + cd ${SCRIPTPATH}/../.. + pwd -P +) NPROC=${NPROC:-$(nproc || grep -c ^processor /proc/cpuinfo)} +ENABLE_THINLTO=${ENABLE_THINLTO:-ON} INSTALL_DIR="${SRCPATH}/release-centos7-llvm/tiflash" rm -rf ${INSTALL_DIR} && mkdir -p ${INSTALL_DIR} @@ -43,14 +49,17 @@ rm -rf ${INSTALL_DIR} && mkdir -p ${INSTALL_DIR} BUILD_DIR="${SRCPATH}/release-centos7-llvm/build-release" rm -rf ${BUILD_DIR} && mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} -cmake "${SRCPATH}" ${DEFINE_CMAKE_PREFIX_PATH} \ - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ - -DENABLE_TESTING=OFF \ - -DENABLE_TESTS=OFF \ - -Wno-dev \ - -DUSE_CCACHE=OFF \ - -DRUN_HAVE_STD_REGEX=0 \ - -GNinja +cmake -S "${SRCPATH}" \ + ${DEFINE_CMAKE_PREFIX_PATH} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DENABLE_TESTING=OFF \ + -DENABLE_TESTS=OFF \ + -Wno-dev \ + -DUSE_CCACHE=OFF \ + -DRUN_HAVE_STD_REGEX=0 \ + -DENABLE_THINLTO=${ENABLE_THINLTO} \ + -DTHINLTO_JOBS=${NPROC} \ + -GNinja cmake --build . --target tiflash --parallel ${NPROC} cmake --install . --component=tiflash-release --prefix="${INSTALL_DIR}" @@ -59,4 +68,3 @@ cmake --install . --component=tiflash-release --prefix="${INSTALL_DIR}" unset LD_LIBRARY_PATH readelf -d "${INSTALL_DIR}/tiflash" ldd "${INSTALL_DIR}/tiflash" - From 44f099934f2ab0bff83b614eb3332dc29feafd9d Mon Sep 17 00:00:00 2001 From: hehechen Date: Tue, 24 May 2022 12:20:46 +0800 Subject: [PATCH 5/7] Revert the retry applyRecord when loadEdit (#4938) close pingcap/tiflash#4937 --- .../Storages/Page/V3/PageDirectoryFactory.cpp | 22 +-- .../Storages/Page/V3/PageDirectoryFactory.h | 5 +- .../Page/V3/tests/gtest_page_directory.cpp | 185 ------------------ 3 files changed, 6 insertions(+), 206 deletions(-) diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 0592d1ddaa8..9d20e0a64ab 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -113,21 +113,13 @@ void PageDirectoryFactory::loadEdit(const PageDirectoryPtr & dir, const PageEntr if (max_applied_ver < r.version) max_applied_ver = r.version; - // We can not avoid page id from being reused under some corner situation. Try to do gcInMemEntries - // and apply again to resolve the error. - if (bool ok = applyRecord(dir, r, /*throw_on_error*/ false); unlikely(!ok)) - { - dir->gcInMemEntries(); - applyRecord(dir, r, /*throw_on_error*/ true); - LOG_FMT_INFO(DB::Logger::get("PageDirectoryFactory"), "resolve from error status done, continue to restore"); - } + applyRecord(dir, r); } } -bool PageDirectoryFactory::applyRecord( +void PageDirectoryFactory::applyRecord( const PageDirectoryPtr & dir, - const PageEntriesEdit::EditRecord & r, - bool throw_on_error) + const PageEntriesEdit::EditRecord & r) { auto [iter, created] = dir->mvcc_table_directory.insert(std::make_pair(r.page_id, nullptr)); if (created) @@ -189,14 +181,8 @@ bool PageDirectoryFactory::applyRecord( catch (DB::Exception & e) { e.addMessage(fmt::format(" [type={}] [page_id={}] [ver={}]", r.type, r.page_id, restored_version)); - if (throw_on_error || e.code() != ErrorCodes::PS_DIR_APPLY_INVALID_STATUS) - { - throw e; - } - LOG_FMT_WARNING(DB::Logger::get("PageDirectoryFactory"), "try to resolve error during restore: {}", e.message()); - return false; + throw e; } - return true; } void PageDirectoryFactory::loadFromDisk(const PageDirectoryPtr & dir, WALStoreReaderPtr && reader) diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.h b/dbms/src/Storages/Page/V3/PageDirectoryFactory.h index e4b76bfba0d..185e8fd19a5 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.h +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.h @@ -60,10 +60,9 @@ class PageDirectoryFactory private: void loadFromDisk(const PageDirectoryPtr & dir, WALStoreReaderPtr && reader); void loadEdit(const PageDirectoryPtr & dir, const PageEntriesEdit & edit); - static bool applyRecord( + static void applyRecord( const PageDirectoryPtr & dir, - const PageEntriesEdit::EditRecord & r, - bool throw_on_error); + const PageEntriesEdit::EditRecord & r); BlobStore::BlobStats * blob_stats = nullptr; }; diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index dfa33824473..151b3b50657 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -2214,191 +2214,6 @@ try } CATCH -TEST_F(PageDirectoryGCTest, RestoreWithDuplicateID) -try -{ - auto restore_from_edit = [](const PageEntriesEdit & edit) { - auto ctx = ::DB::tests::TiFlashTestEnv::getContext(); - auto provider = ctx.getFileProvider(); - auto path = getTemporaryPath(); - PSDiskDelegatorPtr delegator = std::make_shared(path); - PageDirectoryFactory factory; - auto d = factory.createFromEdit(getCurrentTestName(), provider, delegator, edit); - return d; - }; - - const PageId target_id = 100; - // ========= 1 =======// - // Reuse same id: PUT_EXT/DEL/REF - { - PageEntryV3 entry_50{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntriesEdit edit; - edit.put(50, entry_50); - edit.putExternal(target_id); - edit.del(target_id); - // restart and reuse id=100 as ref to replace put_ext - edit.ref(target_id, 50); - - auto restored_dir = restore_from_edit(edit); - auto snap = restored_dir->createSnapshot(); - ASSERT_EQ(restored_dir->getNormalPageId(target_id, snap).low, 50); - } - // Reuse same id: PUT_EXT/DEL/PUT - { - PageEntryV3 entry_50{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_100{.file_id = 100, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntriesEdit edit; - edit.put(50, entry_50); - edit.putExternal(target_id); - edit.del(target_id); - // restart and reuse id=100 as put to replace put_ext - edit.put(target_id, entry_100); - - auto restored_dir = restore_from_edit(edit); - auto snap = restored_dir->createSnapshot(); - ASSERT_SAME_ENTRY(restored_dir->get(target_id, snap).second, entry_100); - } - - // ========= 1-invalid =======// - // Reuse same id: PUT_EXT/BEING REF/DEL/REF - { - PageEntryV3 entry_50{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntriesEdit edit; - edit.put(50, entry_50); - edit.putExternal(target_id); - edit.ref(101, target_id); - edit.del(target_id); - // restart and reuse id=100 as ref. Should not happen because 101 still ref to 100 - edit.ref(target_id, 50); - - ASSERT_THROW(restore_from_edit(edit);, DB::Exception); - } - // Reuse same id: PUT_EXT/BEING REF/DEL/PUT - { - PageEntryV3 entry_50{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_100{.file_id = 100, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntriesEdit edit; - edit.put(50, entry_50); - edit.putExternal(target_id); - edit.ref(101, target_id); - edit.del(target_id); - // restart and reuse id=100 as put. Should not happen because 101 still ref to 100 - edit.put(target_id, entry_100); - - ASSERT_THROW(restore_from_edit(edit);, DB::Exception); - } - - // ========= 2 =======// - // Reuse same id: PUT/DEL/REF - { - PageEntryV3 entry_50{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntriesEdit edit; - edit.put(50, entry_50); - edit.put(target_id, entry_50); - edit.del(target_id); - // restart and reuse id=100 as ref to replace put - edit.ref(target_id, 50); - - auto restored_dir = restore_from_edit(edit); - auto snap = restored_dir->createSnapshot(); - ASSERT_EQ(restored_dir->getNormalPageId(target_id, snap).low, 50); - } - // Reuse same id: PUT/DEL/PUT_EXT - { - PageEntryV3 entry_50{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntriesEdit edit; - edit.put(50, entry_50); - edit.put(target_id, entry_50); - edit.del(target_id); - // restart and reuse id=100 as external to replace put - edit.putExternal(target_id); - - auto restored_dir = restore_from_edit(edit); - auto snap = restored_dir->createSnapshot(); - auto ext_ids = restored_dir->getAliveExternalIds(TEST_NAMESPACE_ID); - ASSERT_EQ(ext_ids.size(), 1); - ASSERT_EQ(*ext_ids.begin(), target_id); - } - - // ========= 2-invalid =======// - // Reuse same id: PUT/BEING REF/DEL/REF - { - PageEntryV3 entry_50{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntriesEdit edit; - edit.put(50, entry_50); - edit.put(target_id, entry_50); - edit.ref(101, target_id); - edit.del(target_id); - // restart and reuse id=100 as ref to replace put - edit.ref(target_id, 50); - - ASSERT_THROW(restore_from_edit(edit);, DB::Exception); - } - // Reuse same id: PUT/BEING REF/DEL/PUT_EXT - { - PageEntryV3 entry_50{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntriesEdit edit; - edit.put(50, entry_50); - edit.put(target_id, entry_50); - edit.ref(101, target_id); - edit.del(target_id); - // restart and reuse id=100 as external to replace put - edit.putExternal(target_id); - - ASSERT_THROW(restore_from_edit(edit);, DB::Exception); - } - - // ========= 3 =======// - // Reuse same id: REF/DEL/PUT - { - PageEntryV3 entry_50{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_100{.file_id = 100, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntriesEdit edit; - edit.put(50, entry_50); - edit.ref(target_id, 50); - edit.del(target_id); - // restart and reuse id=100 as put to replace ref - edit.put(target_id, entry_100); - - auto restored_dir = restore_from_edit(edit); - auto snap = restored_dir->createSnapshot(); - ASSERT_SAME_ENTRY(restored_dir->get(target_id, snap).second, entry_100); - } - // Reuse same id: REF/DEL/PUT_EXT - { - PageEntryV3 entry_50{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntriesEdit edit; - edit.put(50, entry_50); - edit.ref(target_id, 50); - edit.del(target_id); - // restart and reuse id=100 as external to replace ref - edit.putExternal(target_id); - - auto restored_dir = restore_from_edit(edit); - auto snap = restored_dir->createSnapshot(); - auto ext_ids = restored_dir->getAliveExternalIds(TEST_NAMESPACE_ID); - ASSERT_EQ(ext_ids.size(), 1); - ASSERT_EQ(*ext_ids.begin(), target_id); - } - // Reuse same id: REF/DEL/REF another id - { - PageEntryV3 entry_50{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntryV3 entry_51{.file_id = 2, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; - PageEntriesEdit edit; - edit.put(50, entry_50); - edit.put(51, entry_51); - edit.ref(target_id, 50); - edit.del(target_id); - // restart and reuse id=target_id as external to replace put - edit.ref(target_id, 51); - - auto restored_dir = restore_from_edit(edit); - auto snap = restored_dir->createSnapshot(); - ASSERT_EQ(restored_dir->getNormalPageId(target_id, snap).low, 51); - } -} -CATCH - #undef INSERT_ENTRY_TO #undef INSERT_ENTRY #undef INSERT_ENTRY_ACQ_SNAP From 4448bacb2f4b4af159eaf91a09aa87f7f5d66ce6 Mon Sep 17 00:00:00 2001 From: Zhigao Tong Date: Tue, 24 May 2022 13:56:46 +0800 Subject: [PATCH 6/7] Remove useless template instantiations (#4978) ref pingcap/tiflash#4909 --- .../src/Functions/FunctionsTiDBConversion.cpp | 15 ++ dbms/src/Functions/FunctionsTiDBConversion.h | 20 +-- .../Functions/tests/gtest_tidb_conversion.cpp | 146 +++++++++--------- 3 files changed, 94 insertions(+), 87 deletions(-) diff --git a/dbms/src/Functions/FunctionsTiDBConversion.cpp b/dbms/src/Functions/FunctionsTiDBConversion.cpp index 75c015c4bad..74daca2b7fe 100644 --- a/dbms/src/Functions/FunctionsTiDBConversion.cpp +++ b/dbms/src/Functions/FunctionsTiDBConversion.cpp @@ -46,4 +46,19 @@ void registerFunctionsTiDBConversion(FunctionFactory & factory) factory.registerFunction(); } +FunctionBasePtr FunctionBuilderTiDBCast::buildImpl( + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & return_type, + const TiDB::TiDBCollatorPtr &) const +{ + DataTypes data_types(arguments.size()); + + for (size_t i = 0; i < arguments.size(); ++i) + data_types[i] = arguments[i].type; + + auto monotonicity = getMonotonicityInformation(arguments.front().type, return_type.get()); + return std::make_shared>(context, name, std::move(monotonicity), data_types, return_type, in_union, tidb_tp); +} + + } // namespace DB diff --git a/dbms/src/Functions/FunctionsTiDBConversion.h b/dbms/src/Functions/FunctionsTiDBConversion.h index 30251aac36d..bcd7856ee71 100644 --- a/dbms/src/Functions/FunctionsTiDBConversion.h +++ b/dbms/src/Functions/FunctionsTiDBConversion.h @@ -1743,6 +1743,7 @@ inline bool numberToDateTime(Int64 number, MyDateTime & result, DAGContext * ctx return getDatetime(number, result, ctx); } +template class ExecutableFunctionTiDBCast : public IExecutableFunction { public: @@ -1782,13 +1783,15 @@ class ExecutableFunctionTiDBCast : public IExecutableFunction const Context & context; }; +using MonotonicityForRange = std::function; + /// FunctionTiDBCast implements SQL cast function in TiDB /// The basic idea is to dispatch according to combinations of parameter types +template class FunctionTiDBCast final : public IFunctionBase { public: using WrapperType = std::function; - using MonotonicityForRange = std::function; FunctionTiDBCast(const Context & context, const char * name, MonotonicityForRange && monotonicity_for_range, const DataTypes & argument_types, const DataTypePtr & return_type, bool in_union_, const tipb::FieldType & tidb_tp_) : context(context) @@ -1805,7 +1808,7 @@ class FunctionTiDBCast final : public IFunctionBase ExecutableFunctionPtr prepare(const Block & /*sample_block*/) const override { - return std::make_shared( + return std::make_shared>( prepare(getArgumentTypes()[0], getReturnType()), name, in_union, @@ -2341,8 +2344,6 @@ class FunctionTiDBCast final : public IFunctionBase class FunctionBuilderTiDBCast : public IFunctionBuilder { public: - using MonotonicityForRange = FunctionTiDBCast::MonotonicityForRange; - static constexpr auto name = "tidb_cast"; static FunctionBuilderPtr create(const Context & context) { @@ -2369,16 +2370,7 @@ class FunctionBuilderTiDBCast : public IFunctionBuilder FunctionBasePtr buildImpl( const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type, - const TiDB::TiDBCollatorPtr &) const override - { - DataTypes data_types(arguments.size()); - - for (size_t i = 0; i < arguments.size(); ++i) - data_types[i] = arguments[i].type; - - auto monotonicity = getMonotonicityInformation(arguments.front().type, return_type.get()); - return std::make_shared(context, name, std::move(monotonicity), data_types, return_type, in_union, tidb_tp); - } + const TiDB::TiDBCollatorPtr &) const override; // use the last const string column's value as the return type name, in string representation like "Float64" DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override diff --git a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp index d67ef49e108..5f885c2716f 100644 --- a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp +++ b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp @@ -1474,76 +1474,76 @@ TEST_F(TestTidbConversion, skipCheckOverflowIntToDeciaml) const ScaleType scale = 0; // int8(max_prec: 3) -> decimal32(max_prec: 9) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int8_ptr, prec_decimal32, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int8_ptr, prec_decimal32, scale)); // int16(max_prec: 5) -> decimal32(max_prec: 9) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int16_ptr, prec_decimal32, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int16_ptr, prec_decimal32, scale)); // int32(max_prec: 10) -> decimal32(max_prec: 9) - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int32_ptr, prec_decimal32, scale)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int32_ptr, prec_decimal32, scale)); // int64(max_prec: 20) -> decimal32(max_prec: 9) - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int64_ptr, prec_decimal32, scale)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int64_ptr, prec_decimal32, scale)); // uint8(max_prec: 3) -> decimal32(max_prec: 9) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint8_ptr, prec_decimal32, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint8_ptr, prec_decimal32, scale)); // uint16(max_prec: 5) -> decimal32(max_prec: 9) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint16_ptr, prec_decimal32, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint16_ptr, prec_decimal32, scale)); // uint32(max_prec: 10) -> decimal32(max_prec: 9) - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint32_ptr, prec_decimal32, scale)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint32_ptr, prec_decimal32, scale)); // uint64(max_prec: 20) -> decimal32(max_prec: 9) - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint64_ptr, prec_decimal32, scale)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint64_ptr, prec_decimal32, scale)); // int8(max_prec: 3) -> decimal64(max_prec: 18) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int8_ptr, prec_decimal64, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int8_ptr, prec_decimal64, scale)); // int16(max_prec: 5) -> decimal64(max_prec: 18) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int16_ptr, prec_decimal64, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int16_ptr, prec_decimal64, scale)); // int32(max_prec: 10) -> decimal64(max_prec: 18) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int32_ptr, prec_decimal64, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int32_ptr, prec_decimal64, scale)); // int64(max_prec: 20) -> decimal64(max_prec: 18) - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int64_ptr, prec_decimal64, scale)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int64_ptr, prec_decimal64, scale)); // uint8(max_prec: 3) -> decimal64(max_prec: 18) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint8_ptr, prec_decimal64, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint8_ptr, prec_decimal64, scale)); // uint16(max_prec: 5) -> decimal64(max_prec: 18) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint16_ptr, prec_decimal64, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint16_ptr, prec_decimal64, scale)); // uint32(max_prec: 10) -> decimal64(max_prec: 18) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint32_ptr, prec_decimal64, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint32_ptr, prec_decimal64, scale)); // uint64(max_prec: 20) -> decimal64(max_prec: 18) - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint64_ptr, prec_decimal64, scale)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint64_ptr, prec_decimal64, scale)); // int8(max_prec: 3) -> decimal128(max_prec: 38) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int8_ptr, prec_decimal128, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int8_ptr, prec_decimal128, scale)); // int16(max_prec: 5) -> decimal128(max_prec: 38) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int16_ptr, prec_decimal128, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int16_ptr, prec_decimal128, scale)); // int32(max_prec: 10) -> decimal128(max_prec: 38) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int32_ptr, prec_decimal128, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int32_ptr, prec_decimal128, scale)); // int64(max_prec: 20) -> decimal128(max_prec: 38) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int64_ptr, prec_decimal128, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int64_ptr, prec_decimal128, scale)); // uint8(max_prec: 3) -> decimal128(max_prec: 38) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint8_ptr, prec_decimal128, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint8_ptr, prec_decimal128, scale)); // uint16(max_prec: 5) -> decimal128(max_prec: 38) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint16_ptr, prec_decimal128, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint16_ptr, prec_decimal128, scale)); // uint32(max_prec: 10) -> decimal128(max_prec: 38) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint32_ptr, prec_decimal128, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint32_ptr, prec_decimal128, scale)); // uint64(max_prec: 20) -> decimal128(max_prec: 38) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint64_ptr, prec_decimal128, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint64_ptr, prec_decimal128, scale)); // int8(max_prec: 3) -> decimal256(max_prec: 65) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int8_ptr, prec_decimal256, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int8_ptr, prec_decimal256, scale)); // int16(max_prec: 5) -> decimal256(max_prec: 65) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int16_ptr, prec_decimal256, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int16_ptr, prec_decimal256, scale)); // int32(max_prec: 10) -> decimal256(max_prec: 65) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int32_ptr, prec_decimal256, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int32_ptr, prec_decimal256, scale)); // int64(max_prec: 20) -> decimal256(max_prec: 65) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int64_ptr, prec_decimal256, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int64_ptr, prec_decimal256, scale)); // uint8(max_prec: 3) -> decimal256(max_prec: 65) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint8_ptr, prec_decimal256, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint8_ptr, prec_decimal256, scale)); // uint16(max_prec: 5) -> decimal256(max_prec: 65) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint16_ptr, prec_decimal256, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint16_ptr, prec_decimal256, scale)); // uint32(max_prec: 10) -> decimal256(max_prec: 65) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint32_ptr, prec_decimal256, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint32_ptr, prec_decimal256, scale)); // uint64(max_prec: 20) -> decimal256(max_prec: 65) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(uint64_ptr, prec_decimal256, scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(uint64_ptr, prec_decimal256, scale)); } TEST_F(TestTidbConversion, skipCheckOverflowDecimalToDeciaml) @@ -1551,24 +1551,24 @@ TEST_F(TestTidbConversion, skipCheckOverflowDecimalToDeciaml) DataTypePtr decimal32_ptr_8_3 = createDecimal(8, 3); DataTypePtr decimal32_ptr_8_2 = createDecimal(8, 2); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(decimal32_ptr_8_2, 8, 3)); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(decimal32_ptr_8_3, 8, 2)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(decimal32_ptr_8_2, 7, 5)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(decimal32_ptr_8_2, 8, 3)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(decimal32_ptr_8_3, 8, 2)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(decimal32_ptr_8_2, 7, 5)); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(decimal32_ptr_8_2, 9, 3)); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(decimal32_ptr_8_2, 9, 1)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(decimal32_ptr_8_2, 9, 3)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(decimal32_ptr_8_2, 9, 1)); DataTypePtr decimal32_ptr_6_4 = createDecimal(6, 4); // decimal(6, 4) -> decimal(5, 3) // because select cast(99.9999 as decimal(5, 3)); -> 100.000 is greater than 99.999. - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(decimal32_ptr_6_4, 5, 3)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(decimal32_ptr_6_4, 5, 3)); // decimal(6, 4) -> decimal(7, 5) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(decimal32_ptr_6_4, 7, 5)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(decimal32_ptr_6_4, 7, 5)); // decimal(6, 4) -> decimal(6, 5) - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(decimal32_ptr_6_4, 6, 5)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(decimal32_ptr_6_4, 6, 5)); // decimal(6, 4) -> decimal(8, 5) - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(decimal32_ptr_6_4, 8, 5)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(decimal32_ptr_6_4, 8, 5)); } TEST_F(TestTidbConversion, skipCheckOverflowEnumToDecimal) @@ -1583,15 +1583,15 @@ TEST_F(TestTidbConversion, skipCheckOverflowEnumToDecimal) enum16_values.push_back({"b1", 2000}); DataTypePtr enum16_ptr = std::make_shared(enum16_values); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(enum8_ptr, 3, 0)); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(enum8_ptr, 4, 1)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(enum8_ptr, 2, 0)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(enum8_ptr, 4, 2)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(enum8_ptr, 3, 0)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(enum8_ptr, 4, 1)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(enum8_ptr, 2, 0)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(enum8_ptr, 4, 2)); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(enum16_ptr, 5, 0)); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(enum16_ptr, 6, 1)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(enum16_ptr, 4, 0)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(enum16_ptr, 6, 2)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(enum16_ptr, 5, 0)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(enum16_ptr, 6, 1)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(enum16_ptr, 4, 0)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(enum16_ptr, 6, 2)); } TEST_F(TestTidbConversion, skipCheckOverflowMyDateTimeToDeciaml) @@ -1600,18 +1600,18 @@ TEST_F(TestTidbConversion, skipCheckOverflowMyDateTimeToDeciaml) DataTypePtr datetime_ptr_fsp_5 = std::make_shared(5); // rule for no fsp: 14 + to_scale <= to_prec. - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(datetime_ptr_no_fsp, 5, 3)); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(datetime_ptr_no_fsp, 18, 3)); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(datetime_ptr_no_fsp, 17, 3)); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(datetime_ptr_no_fsp, 18, 4)); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(datetime_ptr_no_fsp, 14, 0)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(datetime_ptr_no_fsp, 14, 1)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(datetime_ptr_no_fsp, 5, 3)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(datetime_ptr_no_fsp, 18, 3)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(datetime_ptr_no_fsp, 17, 3)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(datetime_ptr_no_fsp, 18, 4)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(datetime_ptr_no_fsp, 14, 0)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(datetime_ptr_no_fsp, 14, 1)); // rule for fsp: 20 + scale_diff <= to_prec. // 20 + (3 - 6 + 1) = 18 - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(datetime_ptr_fsp_5, 19, 3)); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(datetime_ptr_fsp_5, 18, 3)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(datetime_ptr_fsp_5, 17, 3)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(datetime_ptr_fsp_5, 19, 3)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(datetime_ptr_fsp_5, 18, 3)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(datetime_ptr_fsp_5, 17, 3)); } TEST_F(TestTidbConversion, skipCheckOverflowMyDateToDeciaml) @@ -1619,30 +1619,30 @@ TEST_F(TestTidbConversion, skipCheckOverflowMyDateToDeciaml) DataTypePtr date_ptr = std::make_shared(); // rule: 8 + to_scale <= to_prec. - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(date_ptr, 11, 3)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(date_ptr, 11, 4)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(date_ptr, 10, 3)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(date_ptr, 11, 3)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(date_ptr, 11, 4)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(date_ptr, 10, 3)); } TEST_F(TestTidbConversion, skipCheckOverflowOtherToDecimal) { // float and string not support skip overflow check. DataTypePtr string_ptr = std::make_shared(); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(string_ptr, 1, 0)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(string_ptr, 60, 1)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(string_ptr, 1, 0)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(string_ptr, 60, 1)); DataTypePtr float32_ptr = std::make_shared(); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(float32_ptr, 1, 0)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(float32_ptr, 60, 1)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(float32_ptr, 1, 0)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(float32_ptr, 60, 1)); DataTypePtr float64_ptr = std::make_shared(); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(float64_ptr, 1, 0)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(float64_ptr, 60, 1)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(float64_ptr, 1, 0)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(float64_ptr, 60, 1)); // cast duration to decimal is not supported to push down to tiflash for now. DataTypePtr duration_ptr = std::make_shared(); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(duration_ptr, 1, 0)); - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(duration_ptr, 60, 1)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(duration_ptr, 1, 0)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(duration_ptr, 60, 1)); } // check if template argument of CastInternalType is correct or not. @@ -1654,7 +1654,7 @@ try ScaleType to_scale = 3; DataTypePtr int8_ptr = std::make_shared(); // from_prec(3) + to_scale(3) <= Decimal32::prec(9), so we **CAN** skip check overflow. - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int8_ptr, to_prec, to_scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int8_ptr, to_prec, to_scale)); // from_prec(3) + to_scale(3) <= Int32::real_prec(10) - 1, so CastInternalType should be **Int32**. ASSERT_COLUMN_EQ( @@ -1669,7 +1669,7 @@ try to_prec = 9; to_scale = 7; // from_prec(3) + to_scale(7) > Decimal32::prec(9), so we **CANNOT** skip check overflow. - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int8_ptr, to_prec, to_scale)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int8_ptr, to_prec, to_scale)); // from_prec(3) + to_scale(7) > Int32::real_prec(10) - 1, so CastInternalType should be **Int64**. DAGContext * dag_context = context.getDAGContext(); @@ -1690,7 +1690,7 @@ try to_prec = 40; to_scale = 20; DataTypePtr int64_ptr = std::make_shared(); - ASSERT_TRUE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int64_ptr, to_prec, to_scale)); + ASSERT_TRUE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int64_ptr, to_prec, to_scale)); // from_prec(19) + to_scale(20) > Int128::real_prec(39) - 1, so CastInternalType should be **Int256**. ASSERT_COLUMN_EQ( @@ -1705,7 +1705,7 @@ try // from_prec(19) + to_scale(20) > Decimal256::prec(38), so we **CANNOT** skip check overflow. to_prec = 38; to_scale = 20; - ASSERT_FALSE(FunctionTiDBCast::canSkipCheckOverflowForDecimal(int64_ptr, to_prec, to_scale)); + ASSERT_FALSE(FunctionTiDBCast<>::canSkipCheckOverflowForDecimal(int64_ptr, to_prec, to_scale)); // from_prec(19) + to_scale(20) > Int128::real_prec(39) - 1, so CastInternalType should be **Int256**. ASSERT_COLUMN_EQ( From 1e64c5d95c617676b99dcc0055253fdc56139e7a Mon Sep 17 00:00:00 2001 From: SeaRise Date: Tue, 24 May 2022 14:34:46 +0800 Subject: [PATCH 7/7] fix clang tidy err for `ExpressionAnalyzer`, `Join`, `StorageJoin`, `DAGQueryBlockInterpreter` (#4961) ref pingcap/tiflash#4605 --- .../Coprocessor/DAGQueryBlockInterpreter.cpp | 3 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 120 +++++++++--------- dbms/src/Interpreters/Join.cpp | 10 +- dbms/src/Storages/StorageJoin.cpp | 6 +- 4 files changed, 67 insertions(+), 72 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 8d91b8b23e9..7dfe0ebd871 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -51,7 +51,6 @@ #include #include #include -#include #include @@ -91,7 +90,7 @@ struct AnalysisResult Names aggregation_keys; TiDB::TiDBCollators aggregation_collators; AggregateDescriptions aggregate_descriptions; - bool is_final_agg; + bool is_final_agg = false; }; AnalysisResult analyzeExpressions( diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index eb07e2d541e..cd947d08953 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -247,16 +247,16 @@ void ExpressionAnalyzer::translateQualifiedNames() if (!select_query || !select_query->tables || select_query->tables->children.empty()) return; - ASTTablesInSelectQueryElement & element = static_cast(*select_query->tables->children[0]); + auto & element = static_cast(*select_query->tables->children[0]); if (!element.table_expression) /// This is ARRAY JOIN without a table at the left side. return; - ASTTableExpression & table_expression = static_cast(*element.table_expression); + auto & table_expression = static_cast(*element.table_expression); if (table_expression.database_and_table_name) { - const ASTIdentifier & identifier = static_cast(*table_expression.database_and_table_name); + const auto & identifier = static_cast(*table_expression.database_and_table_name); alias = identifier.tryGetAlias(); @@ -291,7 +291,7 @@ void ExpressionAnalyzer::translateQualifiedNames() void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const String & database_name, const String & table_name, const String & alias) { - if (ASTIdentifier * ident = typeid_cast(ast.get())) + if (auto * ident = typeid_cast(ast.get())) { if (ident->kind == ASTIdentifier::Column) { @@ -352,7 +352,7 @@ void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const String if (ast->children.size() != 1) throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR); - ASTIdentifier * ident = typeid_cast(ast->children[0].get()); + auto * ident = typeid_cast(ast->children[0].get()); if (!ident) throw Exception("Logical error: qualified asterisk must have identifier as its child", ErrorCodes::LOGICAL_ERROR); @@ -396,7 +396,7 @@ void ExpressionAnalyzer::optimizeIfWithConstantCondition() bool ExpressionAnalyzer::tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) const { /// numeric constant in condition - if (const ASTLiteral * literal = typeid_cast(condition.get())) + if (const auto * literal = typeid_cast(condition.get())) { if (literal->value.getType() == Field::Types::Int64 || literal->value.getType() == Field::Types::UInt64) { @@ -406,14 +406,14 @@ bool ExpressionAnalyzer::tryExtractConstValueFromCondition(const ASTPtr & condit } /// cast of numeric constant in condition to UInt8 - if (const ASTFunction * function = typeid_cast(condition.get())) + if (const auto * function = typeid_cast(condition.get())) { if (function->name == "CAST") { - if (ASTExpressionList * expr_list = typeid_cast(function->arguments.get())) + if (auto * expr_list = typeid_cast(function->arguments.get())) { const ASTPtr & type_ast = expr_list->children.at(1); - if (const ASTLiteral * type_literal = typeid_cast(type_ast.get())) + if (const auto * type_literal = typeid_cast(type_ast.get())) { if (type_literal->value.getType() == Field::Types::String && type_literal->value.get() == "UInt8") return tryExtractConstValueFromCondition(expr_list->children.at(0), value); @@ -432,7 +432,7 @@ void ExpressionAnalyzer::optimizeIfWithConstantConditionImpl(ASTPtr & current_as for (ASTPtr & child : current_ast->children) { - ASTFunction * function_node = typeid_cast(child.get()); + auto * function_node = typeid_cast(child.get()); if (!function_node || function_node->name != "if") { optimizeIfWithConstantConditionImpl(child, aliases); @@ -440,7 +440,7 @@ void ExpressionAnalyzer::optimizeIfWithConstantConditionImpl(ASTPtr & current_as } optimizeIfWithConstantConditionImpl(function_node->arguments, aliases); - ASTExpressionList * args = typeid_cast(function_node->arguments.get()); + auto * args = typeid_cast(function_node->arguments.get()); ASTPtr condition_expr = args->children.at(0); ASTPtr then_expr = args->children.at(1); @@ -603,13 +603,13 @@ void ExpressionAnalyzer::initGlobalSubqueries(ASTPtr & ast) /// Bottom-up actions. - if (ASTFunction * node = typeid_cast(ast.get())) + if (auto * node = typeid_cast(ast.get())) { /// For GLOBAL IN. if (do_global && (node->name == "globalIn" || node->name == "globalNotIn")) addExternalStorage(node->arguments->children.at(1)); } - else if (ASTTablesInSelectQueryElement * node = typeid_cast(ast.get())) + else if (auto * node = typeid_cast(ast.get())) { /// For GLOBAL JOIN. if (do_global && node->table_join @@ -628,7 +628,7 @@ void ExpressionAnalyzer::findExternalTables(ASTPtr & ast) /// If table type identifier StoragePtr external_storage; - if (ASTIdentifier * node = typeid_cast(ast.get())) + if (auto * node = typeid_cast(ast.get())) if (node->kind == ASTIdentifier::Table) if ((external_storage = context.tryGetExternalTable(node->name))) external_tables[node->name] = external_storage; @@ -658,8 +658,8 @@ static std::shared_ptr interpretSubquery( const Names & required_source_columns) { /// Subquery or table name. The name of the table is similar to the subquery `SELECT * FROM t`. - const ASTSubquery * subquery = typeid_cast(subquery_or_table_name.get()); - const ASTIdentifier * table = typeid_cast(subquery_or_table_name.get()); + const auto * subquery = typeid_cast(subquery_or_table_name.get()); + const auto * table = typeid_cast(subquery_or_table_name.get()); if (!subquery && !table) throw Exception("IN/JOIN supports only SELECT subqueries.", ErrorCodes::BAD_ARGUMENTS); @@ -721,9 +721,9 @@ static std::shared_ptr interpretSubquery( std::set all_column_names; std::set assigned_column_names; - if (ASTSelectWithUnionQuery * select_with_union = typeid_cast(query.get())) + if (auto * select_with_union = typeid_cast(query.get())) { - if (ASTSelectQuery * select = typeid_cast(select_with_union->list_of_selects->children.at(0).get())) + if (auto * select = typeid_cast(select_with_union->list_of_selects->children.at(0).get())) { for (auto & expr : select->select_expression_list->children) all_column_names.insert(expr->getAliasOrColumnName()); @@ -973,7 +973,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( { /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. if (functionIsInOrGlobalInOperator(func_node->name)) - if (ASTIdentifier * right = typeid_cast(func_node->arguments->children.at(1).get())) + if (auto * right = typeid_cast(func_node->arguments->children.at(1).get())) if (!aliases.count(right->name)) right->kind = ASTIdentifier::Table; @@ -1030,7 +1030,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( } } } - else if (ASTExpressionList * node = typeid_cast(ast.get())) + else if (auto * node = typeid_cast(ast.get())) { // Get hidden column names of mutable storage OrderedNameSet filtered_names; @@ -1068,14 +1068,14 @@ void ExpressionAnalyzer::normalizeTreeImpl( } } } - else if (ASTTablesInSelectQueryElement * node = typeid_cast(ast.get())) + else if (auto * node = typeid_cast(ast.get())) { if (node->table_expression) { auto & database_and_table_name = static_cast(*node->table_expression).database_and_table_name; if (database_and_table_name) { - if (ASTIdentifier * right = typeid_cast(database_and_table_name.get())) + if (auto * right = typeid_cast(database_and_table_name.get())) { right->kind = ASTIdentifier::Table; } @@ -1127,7 +1127,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( } /// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children, but also in where_expression and having_expression. - if (ASTSelectQuery * select = typeid_cast(ast.get())) + if (auto * select = typeid_cast(ast.get())) { if (select->prewhere_expression) normalizeTreeImpl(select->prewhere_expression, finished_asts, current_asts, current_alias, level + 1); @@ -1211,7 +1211,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) * The request is sent to remote servers with already substituted constants. */ - if (ASTSubquery * subquery = typeid_cast(ast.get())) + if (auto * subquery = typeid_cast(ast.get())) { Context subquery_context = context; Settings subquery_settings = context.getSettings(); @@ -1283,7 +1283,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) /** Don't descend into subqueries in arguments of IN operator. * But if an argument is not subquery, than deeper may be scalar subqueries and we need to descend in them. */ - ASTFunction * func = typeid_cast(ast.get()); + auto * func = typeid_cast(ast.get()); if (func && functionIsInOrGlobalInOperator(func->name)) { @@ -1424,7 +1424,7 @@ void ExpressionAnalyzer::optimizeOrderBy() for (const auto & elem : elems) { String name = elem->children.front()->getColumnName(); - const ASTOrderByElement & order_by_elem = typeid_cast(*elem); + const auto & order_by_elem = typeid_cast(*elem); if (elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second) unique_elems.emplace_back(elem); @@ -1496,14 +1496,14 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & continue; /// Don't dive into lambda functions - const ASTFunction * func = typeid_cast(child.get()); + const auto * func = typeid_cast(child.get()); if (func && func->name == "lambda") continue; makeSetsForIndexImpl(child, sample_block); } - const ASTFunction * func = typeid_cast(node.get()); + const auto * func = typeid_cast(node.get()); if (func && functionIsInOperator(func->name)) { const IAST & args = *func->arguments; @@ -1551,7 +1551,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ return; /// If the subquery or table name for SELECT. - const ASTIdentifier * identifier = typeid_cast(arg.get()); + const auto * identifier = typeid_cast(arg.get()); if (typeid_cast(arg.get()) || identifier) { /// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery. @@ -1566,7 +1566,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ if (table) { - StorageSet * storage_set = dynamic_cast(table.get()); + auto * storage_set = dynamic_cast(table.get()); if (storage_set) { @@ -1650,7 +1650,7 @@ void ExpressionAnalyzer::makeExplicitSet(const ASTFunction * node, const Block & DataTypes set_element_types; const ASTPtr & left_arg = args.children.at(0); - const ASTFunction * left_arg_tuple = typeid_cast(left_arg.get()); + const auto * left_arg_tuple = typeid_cast(left_arg.get()); /** NOTE If tuple in left hand side specified non-explicitly * Example: identity((a, b)) IN ((1, 2), (3, 4)) @@ -1672,7 +1672,7 @@ void ExpressionAnalyzer::makeExplicitSet(const ASTFunction * node, const Block & bool single_value = false; ASTPtr elements_ast = arg; - if (ASTFunction * set_func = typeid_cast(arg.get())) + if (auto * set_func = typeid_cast(arg.get())) { if (set_func->name == "tuple") { @@ -1684,7 +1684,7 @@ void ExpressionAnalyzer::makeExplicitSet(const ASTFunction * node, const Block & else { /// Distinguish the case `(x, y) in ((1, 2), (3, 4))` from the case `(x, y) in (1, 2)`. - ASTFunction * any_element = typeid_cast(set_func->arguments->children.at(0).get()); + auto * any_element = typeid_cast(set_func->arguments->children.at(0).get()); if (set_element_types.size() >= 2 && (!any_element || any_element->name != "tuple")) single_value = true; else @@ -1902,7 +1902,7 @@ void ExpressionAnalyzer::getArrayJoinedColumnsImpl(const ASTPtr & ast) if (typeid_cast(ast.get())) return; - if (ASTIdentifier * node = typeid_cast(ast.get())) + if (auto * node = typeid_cast(ast.get())) { if (node->kind == ASTIdentifier::Column) { @@ -1955,7 +1955,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, && actions_stack.getSampleBlock().has(ast->getColumnName())) return; - if (ASTIdentifier * node = typeid_cast(ast.get())) + if (auto * node = typeid_cast(ast.get())) { std::string name = node->getColumnName(); if (!only_consts && !actions_stack.getSampleBlock().has(name)) @@ -1973,7 +1973,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, ErrorCodes::NOT_AN_AGGREGATE); } } - else if (ASTFunction * node = typeid_cast(ast.get())) + else if (auto * node = typeid_cast(ast.get())) { if (node->name == "lambda") throw Exception("Unexpected lambda expression", ErrorCodes::UNEXPECTED_EXPRESSION); @@ -2049,14 +2049,14 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, for (auto & child : node->arguments->children) { - ASTFunction * lambda = typeid_cast(child.get()); + auto * lambda = typeid_cast(child.get()); if (lambda && lambda->name == "lambda") { /// If the argument is a lambda expression, just remember its approximate type. if (lambda->arguments->children.size() != 2) throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - ASTFunction * lambda_args_tuple = typeid_cast(lambda->arguments->children.at(0).get()); + auto * lambda_args_tuple = typeid_cast(lambda->arguments->children.at(0).get()); if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); @@ -2126,17 +2126,17 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, { ASTPtr child = node->arguments->children[i]; - ASTFunction * lambda = typeid_cast(child.get()); + auto * lambda = typeid_cast(child.get()); if (lambda && lambda->name == "lambda") { - const DataTypeFunction * lambda_type = typeid_cast(argument_types[i].get()); - ASTFunction * lambda_args_tuple = typeid_cast(lambda->arguments->children.at(0).get()); + const auto * lambda_type = typeid_cast(argument_types[i].get()); + auto * lambda_args_tuple = typeid_cast(lambda->arguments->children.at(0).get()); ASTs lambda_arg_asts = lambda_args_tuple->arguments->children; NamesAndTypesList lambda_arguments; for (size_t j = 0; j < lambda_arg_asts.size(); ++j) { - ASTIdentifier * identifier = typeid_cast(lambda_arg_asts[j].get()); + auto * identifier = typeid_cast(lambda_arg_asts[j].get()); if (!identifier) throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); @@ -2192,7 +2192,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, if (arguments_present) actions_stack.addAction(ExpressionAction::applyFunction(function_builder, argument_names, node->getColumnName())); } - else if (ASTLiteral * node = typeid_cast(ast.get())) + else if (auto * node = typeid_cast(ast.get())) { DataTypePtr type = applyVisitor(FieldToDataType(), node->value); @@ -2232,7 +2232,7 @@ void ExpressionAnalyzer::getAggregates(const ASTPtr & ast, ExpressionActionsPtr return; } - const ASTFunction * node = typeid_cast(ast.get()); + const auto * node = typeid_cast(ast.get()); if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) { has_aggregation = true; @@ -2276,7 +2276,7 @@ void ExpressionAnalyzer::getAggregates(const ASTPtr & ast, ExpressionActionsPtr void ExpressionAnalyzer::assertNoAggregates(const ASTPtr & ast, const char * description) { - const ASTFunction * node = typeid_cast(ast.get()); + const auto * node = typeid_cast(ast.get()); if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) throw Exception("Aggregate function " + node->getColumnName() @@ -2365,9 +2365,9 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty initChain(chain, source_columns); ExpressionActionsChain::Step & step = chain.steps.back(); - const ASTTablesInSelectQueryElement & join_element = static_cast(*select_query->join()); - const ASTTableJoin & join_params = static_cast(*join_element.table_join); - const ASTTableExpression & table_to_join = static_cast(*join_element.table_expression); + const auto & join_element = static_cast(*select_query->join()); + const auto & join_params = static_cast(*join_element.table_join); + const auto & table_to_join = static_cast(*join_element.table_expression); if (join_params.using_expression_list) getRootActions(join_params.using_expression_list, only_types, false, step.actions); @@ -2386,7 +2386,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty if (table) { - StorageJoin * storage_join = dynamic_cast(table.get()); + auto * storage_join = dynamic_cast(table.get()); if (storage_join) { @@ -2544,7 +2544,7 @@ bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only ASTs asts = select_query->order_expression_list->children; for (const auto & i : asts) { - ASTOrderByElement * ast = typeid_cast(i.get()); + auto * ast = typeid_cast(i.get()); if (!ast || ast->children.empty()) throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); ASTPtr order_expression = ast->children.at(0); @@ -2598,7 +2598,7 @@ void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) con void ExpressionAnalyzer::getActionsBeforeAggregation(const ASTPtr & ast, ExpressionActionsPtr & actions, bool no_subqueries) { - ASTFunction * node = typeid_cast(ast.get()); + auto * node = typeid_cast(ast.get()); if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) for (auto & argument : node->arguments->children) @@ -2714,7 +2714,7 @@ void ExpressionAnalyzer::collectUsedColumns() NameSet required_joined_columns; getRequiredSourceColumnsImpl(ast, available_columns, required, ignored, available_joined_columns, required_joined_columns); - for (NamesAndTypesList::iterator it = columns_added_by_join.begin(); it != columns_added_by_join.end();) + for (auto it = columns_added_by_join.begin(); it != columns_added_by_join.end();) { if (required_joined_columns.count(it->name)) ++it; @@ -2737,7 +2737,7 @@ void ExpressionAnalyzer::collectUsedColumns() NameSet unknown_required_source_columns = required; - for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();) + for (auto it = source_columns.begin(); it != source_columns.end();) { unknown_required_source_columns.erase(it->name); @@ -2777,8 +2777,8 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd if (!node) return; - const ASTTableJoin & table_join = static_cast(*node->table_join); - const ASTTableExpression & table_expression = static_cast(*node->table_expression); + const auto & table_join = static_cast(*node->table_join); + const auto & table_expression = static_cast(*node->table_expression); Block nested_result_sample; if (table_expression.database_and_table_name) @@ -2847,7 +2847,7 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, * - we put identifiers available from JOIN in required_joined_columns. */ - if (ASTIdentifier * node = typeid_cast(ast.get())) + if (auto * node = typeid_cast(ast.get())) { if (node->kind == ASTIdentifier::Column && !ignored_names.count(node->name) @@ -2863,14 +2863,14 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, return; } - if (ASTFunction * node = typeid_cast(ast.get())) + if (auto * node = typeid_cast(ast.get())) { if (node->name == "lambda") { if (node->arguments->children.size() != 2) throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - ASTFunction * lambda_args_tuple = typeid_cast(node->arguments->children.at(0).get()); + auto * lambda_args_tuple = typeid_cast(node->arguments->children.at(0).get()); if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); @@ -2879,7 +2879,7 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, Names added_ignored; for (auto & child : lambda_args_tuple->arguments->children) { - ASTIdentifier * identifier = typeid_cast(child.get()); + auto * identifier = typeid_cast(child.get()); if (!identifier) throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); @@ -2926,7 +2926,7 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, static bool hasArrayJoin(const ASTPtr & ast) { - if (const ASTFunction * function = typeid_cast(&*ast)) + if (const auto * function = typeid_cast(&*ast)) if (function->name == "arrayJoin") return true; diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index f1275d8e88e..ab37a1cb29b 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -26,10 +26,6 @@ #include #include #include -#include - -#include "executeQuery.h" - namespace DB { @@ -104,6 +100,7 @@ Join::Join( , key_names_right(key_names_right_) , use_nulls(use_nulls_) , build_concurrency(std::max(1, build_concurrency_)) + , build_set_exceeded(false) , collators(collators_) , left_filter_column(left_filter_column_) , right_filter_column(right_filter_column_) @@ -116,7 +113,6 @@ Join::Join( , log(Logger::get("Join", req_id)) , limits(limits) { - build_set_exceeded.store(false); for (size_t i = 0; i < build_concurrency; i++) pools.emplace_back(std::make_shared()); if (other_condition_ptr != nullptr) @@ -725,7 +721,7 @@ void recordFilteredRows(const Block & block, const String & filter_column, Colum column = column->convertToFullColumnIfConst(); if (column->isColumnNullable()) { - const ColumnNullable & column_nullable = static_cast(*column); + const auto & column_nullable = static_cast(*column); if (!null_map_holder) { null_map_holder = column_nullable.getNullMapColumnPtr(); @@ -2048,7 +2044,7 @@ class NonJoinedBlockInputStream : public IProfilingBlockInputStream MutableColumns columns_right; std::unique_ptr> position; /// type erasure - size_t current_segment; + size_t current_segment = 0; Join::RowRefList * current_not_mapped_row = nullptr; void setNextCurrentNotMappedRow() diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index 47907b3e94e..4e3c01c6574 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -87,7 +87,7 @@ void registerStorageJoin(StorageFactory & factory) "Storage Join requires at least 3 parameters: Join(ANY|ALL, LEFT|INNER, keys...).", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - const ASTIdentifier * strictness_id = typeid_cast(engine_args[0].get()); + const auto * strictness_id = typeid_cast(engine_args[0].get()); if (!strictness_id) throw Exception("First parameter of storage Join must be ANY or ALL (without quotes).", ErrorCodes::BAD_ARGUMENTS); @@ -100,7 +100,7 @@ void registerStorageJoin(StorageFactory & factory) else throw Exception("First parameter of storage Join must be ANY or ALL (without quotes).", ErrorCodes::BAD_ARGUMENTS); - const ASTIdentifier * kind_id = typeid_cast(engine_args[1].get()); + const auto * kind_id = typeid_cast(engine_args[1].get()); if (!kind_id) throw Exception("Second parameter of storage Join must be LEFT or INNER (without quotes).", ErrorCodes::BAD_ARGUMENTS); @@ -121,7 +121,7 @@ void registerStorageJoin(StorageFactory & factory) key_names.reserve(engine_args.size() - 2); for (size_t i = 2, size = engine_args.size(); i < size; ++i) { - const ASTIdentifier * key = typeid_cast(engine_args[i].get()); + const auto * key = typeid_cast(engine_args[i].get()); if (!key) throw Exception("Parameter №" + toString(i + 1) + " of storage Join don't look like column name.", ErrorCodes::BAD_ARGUMENTS);