From 2707fb42513641dde53e556f931b4ce7f4e81d05 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Thu, 7 Dec 2023 00:22:21 +0800 Subject: [PATCH] util/ranger: don't exclude start key for range from `_` in `like` function | tidb-test=pr/2259 (#48984) (#49066) ref pingcap/tidb#48181, close pingcap/tidb#48983 --- .../testdata/plan_suite_out.json | 4 +-- pkg/util/ranger/points.go | 21 +++++++++++-- pkg/util/ranger/ranger_test.go | 2 +- .../core/issuetest/planner_issue.result | 30 +++++++++++++++++++ .../planner/core/issuetest/planner_issue.test | 12 ++++++++ 5 files changed, 63 insertions(+), 6 deletions(-) diff --git a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json index 899c6f8922c1d..947fc4659d41c 100644 --- a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json +++ b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json @@ -2195,7 +2195,7 @@ }, { "SQL": "select a from t where c_str like 'abc_'", - "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc%af'", @@ -2223,7 +2223,7 @@ }, { "SQL": "select a from t where c_str like 'abc\\__'", - "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 123", diff --git a/pkg/util/ranger/points.go b/pkg/util/ranger/points.go index 31063894d533d..7503aa4a4858e 100644 --- a/pkg/util/ranger/points.go +++ b/pkg/util/ranger/points.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/tidb/pkg/errno" "github.com/pingcap/tidb/pkg/expression" "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/charset" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/sessionctx/stmtctx" "github.com/pingcap/tidb/pkg/types" @@ -678,9 +679,15 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po break } else if pattern[i] == '_' { // Get the prefix, but exclude the prefix. - // e.g., "abc_x", the start point exclude "abc", - // because the string length is more than 3. - exclude = true + // e.g., "abc_x", the start point excludes "abc" because the string length is more than 3. + // + // However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for + // PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish + // 'xxx' from 'xxx ' by a single index range scan. If we exclude the start point for PAD SPACE collation, + // we will actually miss 'xxx ', which will cause wrong results. + if !isPadSpaceCollation(collation) { + exclude = true + } isExactMatch = false break } @@ -715,6 +722,14 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po return []*point{startPoint, endPoint} } +// isPadSpaceCollation returns whether the collation is a PAD SPACE collation. +// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple +// collation != binary check here. We may also move it to collation related packages when NO PAD collations are +// implemented in the future. +func isPadSpaceCollation(collation string) bool { + return collation != charset.CollationBin +} + func (r *builder) buildFromNot(expr *expression.ScalarFunction) []*point { switch n := expr.FuncName.L; n { case ast.IsTruthWithoutNull: diff --git a/pkg/util/ranger/ranger_test.go b/pkg/util/ranger/ranger_test.go index 71a4552ae491c..7635900770245 100644 --- a/pkg/util/ranger/ranger_test.go +++ b/pkg/util/ranger/ranger_test.go @@ -1106,7 +1106,7 @@ create table t( exprStr: "a LIKE 'abc_'", accessConds: "[like(test.t.a, abc_, 92)]", filterConds: "[like(test.t.a, abc_, 92)]", - resultStr: "[(\"abc\",\"abd\")]", + resultStr: "[[\"abc\",\"abd\")]", }, { indexPos: 0, diff --git a/tests/integrationtest/r/planner/core/issuetest/planner_issue.result b/tests/integrationtest/r/planner/core/issuetest/planner_issue.result index 5d95672abacb7..c9e1ffcafc287 100644 --- a/tests/integrationtest/r/planner/core/issuetest/planner_issue.result +++ b/tests/integrationtest/r/planner/core/issuetest/planner_issue.result @@ -180,3 +180,33 @@ LEFT JOIN tmp3 c3 ON c3.id = '1'; id id 1 1 1 1 +drop table if exists t1, t2; +set autocommit=ON; +create table t1(a varchar(20) collate utf8mb4_bin, index ia(a)); +insert into t1 value('测试'),('测试 '),('xxx '); +explain format = brief select *,length(a) from t1 where a like '测试 %'; +id estRows task access object operator info +Projection 250.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3 +└─IndexReader 250.00 root index:IndexRangeScan + └─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["测试 ","测试!"), keep order:false, stats:pseudo +explain format = brief select *,length(a) from t1 where a like '测试'; +id estRows task access object operator info +Projection 10.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3 +└─IndexReader 10.00 root index:IndexRangeScan + └─IndexRangeScan 10.00 cop[tikv] table:t1, index:ia(a) range:["测试","测试"], keep order:false, stats:pseudo +select *,length(a) from t1 where a like '测试 %'; +a length(a) +测试 6 +测试 8 +select *,length(a) from t1 where a like '测试'; +a length(a) +测试 6 +测试 8 +explain format = brief select * from t1 use index (ia) where a like 'xxx_'; +id estRows task access object operator info +IndexReader 250.00 root index:Selection +└─Selection 250.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92) + └─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["xxx","xxy"), keep order:false, stats:pseudo +select * from t1 use index (ia) where a like 'xxx_'; +a +xxx diff --git a/tests/integrationtest/t/planner/core/issuetest/planner_issue.test b/tests/integrationtest/t/planner/core/issuetest/planner_issue.test index 1b58c7c5046c7..59b2ce7db70f6 100644 --- a/tests/integrationtest/t/planner/core/issuetest/planner_issue.test +++ b/tests/integrationtest/t/planner/core/issuetest/planner_issue.test @@ -136,3 +136,15 @@ FROM t2 db LEFT JOIN tmp3 c2 ON c2.id = '1' LEFT JOIN tmp3 c3 ON c3.id = '1'; + +# https://github.com/pingcap/tidb/issues/48983 +drop table if exists t1, t2; +set autocommit=ON; +create table t1(a varchar(20) collate utf8mb4_bin, index ia(a)); +insert into t1 value('测试'),('测试 '),('xxx '); +explain format = brief select *,length(a) from t1 where a like '测试 %'; +explain format = brief select *,length(a) from t1 where a like '测试'; +select *,length(a) from t1 where a like '测试 %'; +select *,length(a) from t1 where a like '测试'; +explain format = brief select * from t1 use index (ia) where a like 'xxx_'; +select * from t1 use index (ia) where a like 'xxx_';