Skip to content

Commit

Permalink
util/ranger: don't exclude start key for range from _ in like fun…
Browse files Browse the repository at this point in the history
…ction | tidb-test=pr/2259 (#48984) (#49066)

ref #48181, close #48983
  • Loading branch information
ti-chi-bot authored Dec 6, 2023
1 parent 51c38df commit 2707fb4
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2195,7 +2195,7 @@
},
{
"SQL": "select a from t where c_str like 'abc_'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 'abc%af'",
Expand Down Expand Up @@ -2223,7 +2223,7 @@
},
{
"SQL": "select a from t where c_str like 'abc\\__'",
"Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
"Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection"
},
{
"SQL": "select a from t where c_str like 123",
Expand Down
21 changes: 18 additions & 3 deletions pkg/util/ranger/points.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/pingcap/tidb/pkg/errno"
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/charset"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx/stmtctx"
"github.com/pingcap/tidb/pkg/types"
Expand Down Expand Up @@ -678,9 +679,15 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po
break
} else if pattern[i] == '_' {
// Get the prefix, but exclude the prefix.
// e.g., "abc_x", the start point exclude "abc",
// because the string length is more than 3.
exclude = true
// e.g., "abc_x", the start point excludes "abc" because the string length is more than 3.
//
// However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for
// PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish
// 'xxx' from 'xxx ' by a single index range scan. If we exclude the start point for PAD SPACE collation,
// we will actually miss 'xxx ', which will cause wrong results.
if !isPadSpaceCollation(collation) {
exclude = true
}
isExactMatch = false
break
}
Expand Down Expand Up @@ -715,6 +722,14 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po
return []*point{startPoint, endPoint}
}

// isPadSpaceCollation returns whether the collation is a PAD SPACE collation.
// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple
// collation != binary check here. We may also move it to collation related packages when NO PAD collations are
// implemented in the future.
func isPadSpaceCollation(collation string) bool {
return collation != charset.CollationBin
}

func (r *builder) buildFromNot(expr *expression.ScalarFunction) []*point {
switch n := expr.FuncName.L; n {
case ast.IsTruthWithoutNull:
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/ranger/ranger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1106,7 +1106,7 @@ create table t(
exprStr: "a LIKE 'abc_'",
accessConds: "[like(test.t.a, abc_, 92)]",
filterConds: "[like(test.t.a, abc_, 92)]",
resultStr: "[(\"abc\",\"abd\")]",
resultStr: "[[\"abc\",\"abd\")]",
},
{
indexPos: 0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,33 @@ LEFT JOIN tmp3 c3 ON c3.id = '1';
id id
1 1
1 1
drop table if exists t1, t2;
set autocommit=ON;
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
insert into t1 value('测试'),('测试 '),('xxx ');
explain format = brief select *,length(a) from t1 where a like '测试 %';
id estRows task access object operator info
Projection 250.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
└─IndexReader 250.00 root index:IndexRangeScan
└─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["测试 ","测试!"), keep order:false, stats:pseudo
explain format = brief select *,length(a) from t1 where a like '测试';
id estRows task access object operator info
Projection 10.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3
└─IndexReader 10.00 root index:IndexRangeScan
└─IndexRangeScan 10.00 cop[tikv] table:t1, index:ia(a) range:["测试","测试"], keep order:false, stats:pseudo
select *,length(a) from t1 where a like '测试 %';
a length(a)
测试 6
测试 8
select *,length(a) from t1 where a like '测试';
a length(a)
测试 6
测试 8
explain format = brief select * from t1 use index (ia) where a like 'xxx_';
id estRows task access object operator info
IndexReader 250.00 root index:Selection
└─Selection 250.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "xxx_", 92)
└─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["xxx","xxy"), keep order:false, stats:pseudo
select * from t1 use index (ia) where a like 'xxx_';
a
xxx
12 changes: 12 additions & 0 deletions tests/integrationtest/t/planner/core/issuetest/planner_issue.test
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,15 @@ FROM
t2 db
LEFT JOIN tmp3 c2 ON c2.id = '1'
LEFT JOIN tmp3 c3 ON c3.id = '1';

# https://github.com/pingcap/tidb/issues/48983
drop table if exists t1, t2;
set autocommit=ON;
create table t1(a varchar(20) collate utf8mb4_bin, index ia(a));
insert into t1 value('测试'),('测试 '),('xxx ');
explain format = brief select *,length(a) from t1 where a like '测试 %';
explain format = brief select *,length(a) from t1 where a like '测试';
select *,length(a) from t1 where a like '测试 %';
select *,length(a) from t1 where a like '测试';
explain format = brief select * from t1 use index (ia) where a like 'xxx_';
select * from t1 use index (ia) where a like 'xxx_';

0 comments on commit 2707fb4

Please sign in to comment.