Skip to content

Commit

Permalink
planner: do not use like to build range when new collation is enabled (
Browse files Browse the repository at this point in the history
  • Loading branch information
Reminiscent authored Jan 21, 2022
1 parent 622b858 commit 4ae9687
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
set names utf8mb4;
use test;
drop table if exists t;
create table t(a int, b real, c bigint as ((a+1)) virtual, e real as ((b+a)));
Expand Down
2 changes: 1 addition & 1 deletion cmd/explaintest/r/index_merge.result
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ drop view if exists v2;
create view v2 as select /*+ use_index_merge(t1) */ * from t1 where c1 < 10 or c2 < 10 and c3 < 10;
show create view v2;
View Create View character_set_client collation_connection
v2 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`%` SQL SECURITY DEFINER VIEW `v2` (`c1`, `c2`, `c3`) AS SELECT /*+ USE_INDEX_MERGE(`t1` )*/ `test`.`t1`.`c1` AS `c1`,`test`.`t1`.`c2` AS `c2`,`test`.`t1`.`c3` AS `c3` FROM `test`.`t1` WHERE `c1`<10 OR `c2`<10 AND `c3`<10 utf8mb4 utf8mb4_general_ci
v2 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`%` SQL SECURITY DEFINER VIEW `v2` (`c1`, `c2`, `c3`) AS SELECT /*+ USE_INDEX_MERGE(`t1` )*/ `test`.`t1`.`c1` AS `c1`,`test`.`t1`.`c2` AS `c2`,`test`.`t1`.`c3` AS `c3` FROM `test`.`t1` WHERE `c1`<10 OR `c2`<10 AND `c3`<10 utf8mb4 utf8mb4_bin
select * from v2 order by 1;
c1 c2 c3
1 1 1
Expand Down
1 change: 1 addition & 0 deletions cmd/explaintest/t/explain_generate_column_substitute.test
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
set names utf8mb4;
use test;
drop table if exists t;
create table t(a int, b real, c bigint as ((a+1)) virtual, e real as ((b+a)));
Expand Down
23 changes: 23 additions & 0 deletions expression/integration_serial_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,29 @@ func TestIssue17891(t *testing.T) {
tk.MustExec("create table test(id int, value set ('a','b','c') charset utf8mb4 collate utf8mb4_general_ci default 'a,B ,C');")
}

func TestIssue31174(t *testing.T) {
collate.SetNewCollationEnabledForTest(true)
defer collate.SetNewCollationEnabledForTest(false)

store, clean := testkit.CreateMockStore(t)
defer clean()

tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a char(4) collate utf8_general_ci primary key /*T![clustered_index] clustered */);")
tk.MustExec("insert into t values('`?');")
// The 'like' condition can not be used to construct the range.
tk.HasPlan("select * from t where a like '`%';", "TableFullScan")
tk.MustQuery("select * from t where a like '`%';").Check(testkit.Rows("`?"))

tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a char(4) collate binary primary key /*T![clustered_index] clustered */);")
tk.MustExec("insert into t values('`?');")
tk.HasPlan("select * from t where a like '`%';", "TableRangeScan")
tk.MustQuery("select * from t where a like '`%';").Check(testkit.Rows("`?\x00\x00"))
}

func TestIssue20268(t *testing.T) {
collate.SetNewCollationEnabledForTest(true)
defer collate.SetNewCollationEnabledForTest(false)
Expand Down
7 changes: 5 additions & 2 deletions util/collate/collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,10 +331,13 @@ func IsCICollation(collate string) bool {
collate == "utf8_unicode_ci" || collate == "utf8mb4_unicode_ci"
}

// IsBinCollation returns if the collation is 'xx_bin'.
// IsBinCollation returns if the collation is 'xx_bin' or 'bin'.
// The function is to determine whether the sortkey of a char type of data under the collation is equal to the data itself,
// and both xx_bin and collationBin are satisfied.
func IsBinCollation(collate string) bool {
return collate == charset.CollationASCII || collate == charset.CollationLatin1 ||
collate == charset.CollationUTF8 || collate == charset.CollationUTF8MB4
collate == charset.CollationUTF8 || collate == charset.CollationUTF8MB4 ||
collate == charset.CollationBin
}

// CollationToProto converts collation from string to int32(used by protocol).
Expand Down
10 changes: 10 additions & 0 deletions util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,16 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction

func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) bool {
_, collation := scalar.CharsetAndCollation()
if collate.NewCollationEnabled() && !collate.IsBinCollation(collation) {
// The algorithm constructs the range in byte-level: for example, ab% is mapped to [ab, ac] by adding 1 to the last byte.
// However, this is incorrect for non-binary collation strings because the sort key order is not the same as byte order.
// For example, "`%" is mapped to the range [`, a](where ` is 0x60 and a is 0x61).
// Because the collation utf8_general_ci is case-insensitive, a and A have the same sort key.
// Finally, the range comes to be [`, A], which is actually an empty range.
// See https://github.com/pingcap/tidb/issues/31174 for more details.
// In short, when the column type is non-binary collation string, we cannot use `like` expressions to generate the range.
return false
}
if !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().Collate, collation) {
return false
}
Expand Down

0 comments on commit 4ae9687

Please sign in to comment.