Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support colllation for planner refactory test framework #5449

Merged
merged 10 commits into from
Jul 26, 2022
97 changes: 97 additions & 0 deletions dbms/src/Flash/tests/gtest_collation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <TestUtils/ExecutorTestUtils.h>
#include <TestUtils/mockExecutor.h>

namespace DB
{
namespace tests
{

class ExecutorCollation : public DB::tests::ExecutorTest
{
public:
using ColStringNullableType = std::optional<typename TypeTraits<String>::FieldType>;
using ColUInt64Type = typename TypeTraits<UInt64>::FieldType;

using ColumnWithNullableString = std::vector<ColStringNullableType>;
using ColumnWithUInt64 = std::vector<ColUInt64Type>;

void initializeContext() override
{
ExecutorTest::initializeContext();

context.addMockTable({db_name, table_name},
{{col_name, TiDB::TP::TypeString}},
{toNullableVec<String>(col_name, col)});

context.addMockTable({db_name, chinese_table},
{{chinese_col_name, TiDB::TP::TypeString}},
{toNullableVec<String>(chinese_col_name, chinese_col)});
}

/// Prepare some names
const String db_name{"test_db"};
const String table_name{"collation_table"};
const String col_name{"col"};
const ColumnWithNullableString col{"china", "china", "china ", "CHINA", "cHiNa ", "usa", "usa", "usa ", "USA", "USA "};

const String chinese_table{"chinese"};
const String chinese_col_name{"col"};
const ColumnWithNullableString chinese_col{"北京", "北京 ", "北bei京", "北Bei京", "北bei京 ", "上海", "上海 ", "shanghai ", "ShangHai", "ShangHai "};
};

/// Guarantee that test framework has correctly supported the collation.
TEST_F(ExecutorCollation, Verification)
try
{
std::shared_ptr<tipb::DAGRequest> request;
{
/// Test default collation(utf8mb4_bin)
request = context.scan(db_name, table_name).aggregation(MockAstVec{}, {col(col_name)}).project({col_name}).build(context);
SeaRise marked this conversation as resolved.
Show resolved Hide resolved
ASSERT_COLUMNS_EQ_UR(ColumnsWithTypeAndName{toNullableVec<String>(col_name, ColumnWithNullableString{"usa", "CHINA", "USA", "china", "cHiNa "})}, executeStreams(request, 1));

request = context.scan(db_name, chinese_table).aggregation(MockAstVec{}, {col(chinese_col_name)}).project({chinese_col_name}).build(context);
ASSERT_COLUMNS_EQ_UR(ColumnsWithTypeAndName{toNullableVec<String>(chinese_col_name, ColumnWithNullableString{"ShangHai", "北京", "北Bei京", "shanghai ", "北bei京", "上海"})}, executeStreams(request, 1));
}

{
/// Test utf8mb4_general_ci
context.setCollation(TiDB::ITiDBCollator::UTF8_GENERAL_CI);
request = context.scan(db_name, table_name).aggregation(MockAstVec{}, {col(col_name)}).project({col_name}).build(context);
ASSERT_COLUMNS_EQ_UR(ColumnsWithTypeAndName{toNullableVec<String>(col_name, ColumnWithNullableString{"usa", "china"})}, executeStreams(request, 1));

request = context.scan(db_name, chinese_table).aggregation(MockAstVec{}, {col(chinese_col_name)}).project({chinese_col_name}).build(context);
ASSERT_COLUMNS_EQ_UR(ColumnsWithTypeAndName{toNullableVec<String>(chinese_col_name, ColumnWithNullableString{"北京", "shanghai ", "北bei京", "上海"})}, executeStreams(request, 1));
}

{
/// Test utf8_bin
context.setCollation(TiDB::ITiDBCollator::UTF8_BIN);
ywqzzy marked this conversation as resolved.
Show resolved Hide resolved
request = context.scan(db_name, table_name).aggregation(MockAstVec{}, {col(col_name)}).project({col_name}).build(context);
ASSERT_COLUMNS_EQ_UR(ColumnsWithTypeAndName{toNullableVec<String>(col_name, ColumnWithNullableString{"USA", "CHINA", "usa", "china", "cHiNa "})}, executeStreams(request, 1));
}

{
/// Test utf8_unicode_CI
context.setCollation(TiDB::ITiDBCollator::UTF8_UNICODE_CI);
request = context.scan(db_name, table_name).aggregation(MockAstVec{}, {col(col_name)}).project({col_name}).build(context);
ASSERT_COLUMNS_EQ_UR(ColumnsWithTypeAndName{toNullableVec<String>(col_name, ColumnWithNullableString{"china", "usa"})}, executeStreams(request, 1));
}
}
CATCH

} // namespace tests
} // namespace DB
4 changes: 2 additions & 2 deletions dbms/src/TestUtils/mockExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ void MockDAGRequestContext::addExchangeReceiver(const String & name, MockColumnI

DAGRequestBuilder MockDAGRequestContext::scan(String db_name, String table_name)
{
auto builder = DAGRequestBuilder(index).mockTable({db_name, table_name}, mock_tables[db_name + "." + table_name]);
auto builder = DAGRequestBuilder(index, collation).mockTable({db_name, table_name}, mock_tables[db_name + "." + table_name]);
// If don't have related columns, user must pass input columns as argument of executeStreams in order to run Executors Tests.
// If user don't want to test executors, it will be safe to run Interpreter Tests.
if (mock_table_columns.find(db_name + "." + table_name) != mock_table_columns.end())
Expand All @@ -399,7 +399,7 @@ DAGRequestBuilder MockDAGRequestContext::scan(String db_name, String table_name)

DAGRequestBuilder MockDAGRequestContext::receive(String exchange_name, uint64_t fine_grained_shuffle_stream_count)
{
auto builder = DAGRequestBuilder(index).exchangeReceiver(exchange_schemas[exchange_name], fine_grained_shuffle_stream_count);
auto builder = DAGRequestBuilder(index, collation).exchangeReceiver(exchange_schemas[exchange_name], fine_grained_shuffle_stream_count);
receiver_source_task_ids_map[builder.getRoot()->name] = {};
// If don't have related columns, user must pass input columns as argument of executeStreams in order to run Executors Tests.
// If user don't want to test executors, it will be safe to run Interpreter Tests.
Expand Down
14 changes: 12 additions & 2 deletions dbms/src/TestUtils/mockExecutor.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <Debug/dbgFuncCoprocessor.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTFunction.h>
#include <Storages/Transaction/Collator.h>
#include <tipb/executor.pb.h>

namespace DB::tests
Expand Down Expand Up @@ -52,9 +53,10 @@ class DAGRequestBuilder
return executor_index;
}

explicit DAGRequestBuilder(size_t & index)
explicit DAGRequestBuilder(size_t & index, Int32 collator = TiDB::ITiDBCollator::UTF8MB4_BIN)
: executor_index(index)
{
properties.collator = -abs(collator);
Comment on lines +61 to +64
Copy link
Contributor

@Willendless Willendless Jul 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we use -abs(collator) instead of just collator?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we use -abs(collator) instead of just collator?

collator passed by tidb is negative, so we need to guarantee the collator < 0. See https://github.com/pingcap/tiflash/blob/master/dbms/src/Flash/Coprocessor/DAGUtils.cpp#L1357

}

ExecutorPtr getRoot()
Expand Down Expand Up @@ -101,6 +103,9 @@ class DAGRequestBuilder
DAGRequestBuilder & sort(MockOrderByItem order_by, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0);
DAGRequestBuilder & sort(MockOrderByItemVec order_by_vec, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0);

void setCollation(Int32 collator_) { properties.collator = -abs(collator_); }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may be we can wrap a function for -abs(collator_)

/// add some comments
Int32 inline getXXXCollation(Int32 collator_)
{
    return -abs(collator_);
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may be we can wrap a function for -abs(collator_)

/// add some comments
Int32 inline getXXXCollation(Int32 collator_)
{
    return -abs(collator_);
}

ok

Int32 getCollation() const { return abs(properties.collator); }

private:
void initDAGRequest(tipb::DAGRequest & dag_request);
DAGRequestBuilder & buildAggregation(ASTPtr agg_funcs, ASTPtr group_by_exprs);
Expand All @@ -117,8 +122,9 @@ class DAGRequestBuilder
class MockDAGRequestContext
{
public:
explicit MockDAGRequestContext(Context context_)
explicit MockDAGRequestContext(Context context_, Int32 collation_ = TiDB::ITiDBCollator::UTF8MB4_BIN)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the default parameter not no collation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the default parameter not no collation?

Because tidb opens the collation by default recently, and sets it as UTF8MB4_BIN.

: context(context_)
, collation(-abs(collation_))
{
index = 0;
}
Expand All @@ -143,6 +149,9 @@ class MockDAGRequestContext
DAGRequestBuilder scan(String db_name, String table_name);
DAGRequestBuilder receive(String exchange_name, uint64_t fine_grained_shuffle_stream_count = 0);

void setCollation(Int32 collation_) { collation = -abs(collation_); }
Int32 getCollation() const { return abs(collation); }

private:
size_t index;
std::unordered_map<String, MockColumnInfoVec> mock_tables;
Expand All @@ -157,6 +166,7 @@ class MockDAGRequestContext
// In TiFlash, we use task_id to identify an Mpp Task.
std::unordered_map<String, std::vector<Int64>> receiver_source_task_ids_map;
Context context;
Int32 collation;
};

ASTPtr buildColumn(const String & column_name);
Expand Down