diff --git a/be/src/olap/rowset/rowset_reader_context.h b/be/src/olap/rowset/rowset_reader_context.h index 0d4f5897772ad5c..dc32f632cff0d44 100644 --- a/be/src/olap/rowset/rowset_reader_context.h +++ b/be/src/olap/rowset/rowset_reader_context.h @@ -74,6 +74,7 @@ struct RowsetReaderContext { // for unique key merge on write bool enable_unique_key_merge_on_write = false; const DeleteBitmap* delete_bitmap = nullptr; + bool query_mow_in_mor = false; bool record_rowids = false; bool is_vertical_compaction = false; bool is_key_column_group = false; diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp index 9ab9e4b1b365f5d..98267c3f2da8d50 100644 --- a/be/src/olap/tablet_reader.cpp +++ b/be/src/olap/tablet_reader.cpp @@ -207,8 +207,12 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { } if (_tablet_schema->keys_type() == UNIQUE_KEYS && _tablet->enable_unique_key_merge_on_write()) { - // unique keys with merge on write, no need to merge sort keys in rowset - need_ordered_result = false; + if (read_params.query_mow_in_mor) { + need_ordered_result = true; + } else { + // unique keys with merge on write, no need to merge sort keys in rowset + need_ordered_result = false; + } } if (_aggregation) { // compute engine will aggregate rows with the same key, @@ -252,6 +256,7 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { _reader_context.is_unique = tablet()->keys_type() == UNIQUE_KEYS; _reader_context.merged_rows = &_merged_rows; _reader_context.delete_bitmap = read_params.delete_bitmap; + _reader_context.query_mow_in_mor = read_params.query_mow_in_mor; _reader_context.enable_unique_key_merge_on_write = tablet()->enable_unique_key_merge_on_write(); _reader_context.record_rowids = read_params.record_rowids; _reader_context.is_key_column_group = read_params.is_key_column_group; diff --git a/be/src/olap/tablet_reader.h b/be/src/olap/tablet_reader.h index 50517e047ba556e..4c18f17ad355b61 100644 --- a/be/src/olap/tablet_reader.h +++ b/be/src/olap/tablet_reader.h @@ -148,6 +148,7 @@ class TabletReader { std::vector rs_splits; // For unique key table with merge-on-write DeleteBitmap* delete_bitmap = nullptr; + bool query_mow_in_mor = false; // return_columns is init from query schema std::vector return_columns; diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index e7f2c18b09404a6..1b73fed709607f5 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -398,6 +398,10 @@ class RuntimeState { return _query_options.__isset.skip_delete_bitmap && _query_options.skip_delete_bitmap; } + bool query_mow_in_mor() const { + return _query_options.__isset.query_mow_in_mor && _query_options.query_mow_in_mor; + } + bool skip_missing_version() const { return _query_options.__isset.skip_missing_version && _query_options.skip_missing_version; } diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 99e9dcd52a9e7cb..13700f7b21e90ac 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -384,8 +384,11 @@ Status NewOlapScanner::_init_tablet_reader_params( _tablet_reader_params.use_page_cache = _state->enable_page_cache(); - if (tablet->enable_unique_key_merge_on_write() && !_state->skip_delete_bitmap()) { - _tablet_reader_params.delete_bitmap = &tablet->tablet_meta()->delete_bitmap(); + if (tablet->enable_unique_key_merge_on_write()) { + _tablet_reader_params.query_mow_in_mor = _state->query_mow_in_mor(); + if (!(_state->skip_delete_bitmap() || _state->query_mow_in_mor())) { + _tablet_reader_params.delete_bitmap = &tablet->tablet_meta()->delete_bitmap(); + } } if (!_state->skip_storage_engine_merge()) { diff --git a/be/src/vec/olap/vcollect_iterator.cpp b/be/src/vec/olap/vcollect_iterator.cpp index f7017a058df23d4..11b679f0ba692ec 100644 --- a/be/src/vec/olap/vcollect_iterator.cpp +++ b/be/src/vec/olap/vcollect_iterator.cpp @@ -71,7 +71,11 @@ void VCollectIterator::init(TabletReader* reader, bool ori_data_overlapping, boo (_reader->_direct_mode || _reader->_tablet->keys_type() == KeysType::DUP_KEYS || (_reader->_tablet->keys_type() == KeysType::UNIQUE_KEYS && _reader->_tablet->enable_unique_key_merge_on_write()))) { - _merge = false; + if (_reader->_reader_context.query_mow_in_mor) { + _merge = true; + } else { + _merge = false; + } } // When data is none overlapping, no need to build heap to traverse data diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 256d5401871e312..6276a86ae3adc28 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -375,6 +375,8 @@ public class SessionVariable implements Serializable, Writable { public static final String SKIP_DELETE_BITMAP = "skip_delete_bitmap"; + public static final String QUERY_MOW_IN_MOR = "query_mow_in_mor"; + public static final String SKIP_MISSING_VERSION = "skip_missing_version"; public static final String SKIP_BAD_TABLET = "skip_bad_tablet"; @@ -1441,6 +1443,12 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { @VariableMgr.VarAttr(name = SKIP_DELETE_BITMAP) public boolean skipDeleteBitmap = false; + /** + * Query mow table in mor way. + */ + @VariableMgr.VarAttr(name = QUERY_MOW_IN_MOR) + public boolean queryMowInMor = false; + // This variable replace the original FE config `recover_with_skip_missing_version`. // In some scenarios, all replicas of tablet are having missing versions, and the tablet is unable to recover. // This config can control the behavior of query. When it is set to `true`, the query will ignore the @@ -3716,9 +3724,10 @@ public TQueryOptions toThrift() { tResult.setSkipDeletePredicate(skipDeletePredicate); tResult.setSkipDeleteBitmap(skipDeleteBitmap); + tResult.setQueryMowInMor(queryMowInMor); if (ConnectContext.get() != null && ConnectContext.get().isTxnModel()) { // TODO set to true only if the sub txn ids are not empty - tResult.setSkipDeleteBitmap(true); + tResult.setQueryMowInMor(true); } tResult.setPartitionedHashJoinRowsThreshold(partitionedHashJoinRowsThreshold); diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index f812676365f3e43..56fcd28b48b8afc 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -348,6 +348,8 @@ struct TQueryOptions { 135: optional bool enable_parallel_outfile = false; + 136: optional bool query_mow_in_mor = false + // For cloud, to control if the content would be written into file cache // In write path, to control if the content would be written into file cache. // In read path, read from file cache or remote storage when execute query. diff --git a/regression-test/data/insert_p0/transaction/sub_txn_mow.out b/regression-test/data/insert_p0/transaction/sub_txn_mow.out index eaae31ae33fa8cb..359cbd5058f5b5a 100644 --- a/regression-test/data/insert_p0/transaction/sub_txn_mow.out +++ b/regression-test/data/insert_p0/transaction/sub_txn_mow.out @@ -1,4 +1,22 @@ -- This file is automatically generated. You should know what you did if you want to edit this +-- !select_1 -- + +-- !select_2 -- +1 a 1 +2 b 2 + +-- !select_3 -- +1 a 1 +2 b 2 + +-- !select_4 -- +1 a 1 +2 b 2 + +-- !select_5 -- +1 a 1 +2 b 2 + -- !select_61 -- 1 a 1 2 b 2 @@ -11,6 +29,24 @@ 1 a 1 2 b 2 +-- !select_1 -- + +-- !select_2 -- +1 a 1 +2 b 2 + +-- !select_3 -- +1 a 1 +2 b 2 + +-- !select_4 -- +1 a 1 +2 b 2 + +-- !select_5 -- +1 a 1 +2 b 2 + -- !select_61 -- 1 a 1 2 b 2 @@ -23,6 +59,34 @@ 1 a 1 2 b 2 +-- !par_1 -- + +-- !par_2 -- + +-- !par_3 -- +1 a 1 +10 a 10 +2 b 2 +20 b 2 + +-- !par_4 -- +1 a 1 +10 a 10 +2 b 2 +20 b 2 + +-- !par_5 -- +1 a 1 +10 a 10 +2 b 2 +20 b 2 + +-- !par_6 -- +1 a 1 +10 a 10 +2 b 2 +20 b 2 + -- !par_71 -- 1 a 1 10 a 10 @@ -41,6 +105,34 @@ 2 b 2 20 b 2 +-- !par_1 -- + +-- !par_2 -- + +-- !par_3 -- +1 a 1 +10 a 10 +2 b 2 +20 b 2 + +-- !par_4 -- +1 a 1 +10 a 10 +2 b 2 +20 b 2 + +-- !par_5 -- +1 a 1 +10 a 10 +2 b 2 +20 b 2 + +-- !par_6 -- +1 a 1 +10 a 10 +2 b 2 +20 b 2 + -- !par_71 -- 1 a 1 10 a 10 @@ -59,6 +151,28 @@ 2 b 2 20 b 2 +-- !del_1 -- +1 a 1 + +-- !del_2 -- +1 a 1 +2 b 2 + +-- !del_3 -- +1 a 1 +2 b 2 + +-- !del_4 -- +1 a 101 +2 b 2 + +-- !del_5 -- +2 b 2 + +-- !del_6 -- +1 a 1 +2 b 2 + -- !del_71 -- 1 a 1 2 b 2 @@ -70,6 +184,28 @@ -- !del_73 -- 2 b 2 +-- !del_1 -- +1 a 1 + +-- !del_2 -- +1 a 1 +2 b 2 + +-- !del_3 -- +1 a 1 +2 b 2 + +-- !del_4 -- +1 a 101 +2 b 2 + +-- !del_5 -- +2 b 2 + +-- !del_6 -- +1 a 1 +2 b 2 + -- !del_71 -- 1 a 1 2 b 2 diff --git a/regression-test/suites/insert_p0/transaction/sub_txn_mow.groovy b/regression-test/suites/insert_p0/transaction/sub_txn_mow.groovy index 9a11a5cb85637a1..ca816f56702441a 100644 --- a/regression-test/suites/insert_p0/transaction/sub_txn_mow.groovy +++ b/regression-test/suites/insert_p0/transaction/sub_txn_mow.groovy @@ -51,19 +51,19 @@ suite("sub_txn_mow") { } sql """ insert into ${prefix}_3 select * from ${prefix}_2; """ - // order_qt_select_1 """ select * from ${prefix}_3; """ + order_qt_select_1 """ select * from ${prefix}_3; """ sql """ insert into ${prefix}_3 select * from ${prefix}_1; """ - // order_qt_select_2 """ select * from ${prefix}_3; """ + order_qt_select_2 """ select * from ${prefix}_3; """ sql """ insert into ${prefix}_2 select * from ${prefix}_3; """ - // order_qt_select_3 """ select * from ${prefix}_2; """ + order_qt_select_3 """ select * from ${prefix}_2; """ sql """ insert into ${prefix}_1 select * from ${prefix}_2; """ - // order_qt_select_4 """ select * from ${prefix}_1; """ + order_qt_select_4 """ select * from ${prefix}_1; """ sql """ insert into ${prefix}_2 select * from ${prefix}_1; """ - // order_qt_select_5 """ select * from ${prefix}_2; """ + order_qt_select_5 """ select * from ${prefix}_2; """ if (prefix == table_txn) { sql """ commit; """ @@ -108,22 +108,22 @@ suite("sub_txn_mow") { } sql """ insert into ${prefix}_3 PARTITION(p_1_11) select * from ${prefix}_2; """ - // order_qt_par_1 """ select * from ${prefix}_3; """ + order_qt_par_1 """ select * from ${prefix}_3; """ sql """ insert into ${prefix}_2 PARTITION(p_1_11) select * from ${prefix}_3; """ - // order_qt_par_2 """ select * from ${prefix}_2; """ + order_qt_par_2 """ select * from ${prefix}_2; """ sql """ insert into ${prefix}_3 select * from ${prefix}_1; """ - // order_qt_par_3 """ select * from ${prefix}_3; """ + order_qt_par_3 """ select * from ${prefix}_3; """ sql """ insert into ${prefix}_2 select * from ${prefix}_3; """ - // order_qt_par_4 """ select * from ${prefix}_2; """ + order_qt_par_4 """ select * from ${prefix}_2; """ sql """ insert into ${prefix}_3 PARTITION(p_1_11) select * from ${prefix}_2; """ - // order_qt_par_5 """ select * from ${prefix}_3; """ + order_qt_par_5 """ select * from ${prefix}_3; """ sql """ insert into ${prefix}_2 PARTITION(p_11_21) select * from ${prefix}_3; """ - // order_qt_par_6 """ select * from ${prefix}_2; """ + order_qt_par_6 """ select * from ${prefix}_2; """ if (prefix == table_txn) { sql """ commit; """ @@ -143,22 +143,22 @@ suite("sub_txn_mow") { } sql """ delete from ${prefix}_3 where id > 1; """ - // order_qt_del_1 """ select * from ${prefix}_3; """ + order_qt_del_1 """ select * from ${prefix}_3; """ sql """ insert into ${prefix}_2 select * from ${prefix}_3; """ - // order_qt_del_2 """ select * from ${prefix}_2; """ + order_qt_del_2 """ select * from ${prefix}_2; """ sql """ insert into ${prefix}_3 select * from ${prefix}_2; """ - // order_qt_del_3 """ select * from ${prefix}_3; """ + order_qt_del_3 """ select * from ${prefix}_3; """ sql """ update ${prefix}_3 set score = score + 100 where id = 1; """ - // order_qt_del_4 """ select * from ${prefix}_3; """ + order_qt_del_4 """ select * from ${prefix}_3; """ sql """ delete from ${prefix}_3 where id < 2; """ - // order_qt_del_5 """ select * from ${prefix}_3; """ + order_qt_del_5 """ select * from ${prefix}_3; """ sql """ insert into ${prefix}_2 select * from ${prefix}_3; """ - // order_qt_del_6 """ select * from ${prefix}_2; """ + order_qt_del_6 """ select * from ${prefix}_2; """ if (prefix == table_txn) { sql """ commit; """