Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: do not use like to build range when new collation is enabled #31278

Merged
merged 21 commits into from
Jan 21, 2022
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
8707462
planner: do not use like to build range when new collation enable
Reminiscent Jan 4, 2022
90cd9a3
Merge branch 'master' of github.com:pingcap/tidb into issue#31022
Reminiscent Jan 4, 2022
c03b389
fix ut
Reminiscent Jan 5, 2022
3129b20
Merge branch 'master' of github.com:pingcap/tidb into issue#31022
Reminiscent Jan 5, 2022
745c503
Merge branch 'master' into issue#31022
xhebox Jan 11, 2022
c1bc6c4
Merge branch 'master' into issue#31022
bb7133 Jan 14, 2022
799ffb4
address comments
Reminiscent Jan 17, 2022
3c797b8
update the test results
Reminiscent Jan 17, 2022
f715ca7
Merge branch 'master' of github.com:pingcap/tidb into issue#31022
Reminiscent Jan 17, 2022
87102e5
Merge remote-tracking branch 'origin/issue#31022' into issue#31022
Reminiscent Jan 17, 2022
5514ca5
update the test results
Reminiscent Jan 18, 2022
b43fa12
Merge branch 'master' of github.com:pingcap/tidb into issue#31022
Reminiscent Jan 18, 2022
f8b54a8
address comments
Reminiscent Jan 19, 2022
64e3c84
Merge branch 'master' of github.com:pingcap/tidb into issue#31022
Reminiscent Jan 19, 2022
6c9ae01
Merge branch 'master' into issue#31022
bb7133 Jan 20, 2022
8dc0a1f
Merge branch 'master' into issue#31022
hawkingrei Jan 21, 2022
0709b79
Merge branch 'master' into issue#31022
ti-chi-bot Jan 21, 2022
8082b0d
Merge branch 'master' into issue#31022
ti-chi-bot Jan 21, 2022
547f9b5
Merge branch 'master' into issue#31022
ti-chi-bot Jan 21, 2022
cacb9cf
Merge branch 'master' into issue#31022
ti-chi-bot Jan 21, 2022
fd30ef2
Merge branch 'master' into issue#31022
ti-chi-bot Jan 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
set names utf8mb4;
use test;
drop table if exists t;
create table t(a int, b real, c bigint as ((a+1)) virtual, e real as ((b+a)));
Expand Down
2 changes: 1 addition & 1 deletion cmd/explaintest/r/index_merge.result
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ drop view if exists v2;
create view v2 as select /*+ use_index_merge(t1) */ * from t1 where c1 < 10 or c2 < 10 and c3 < 10;
show create view v2;
View Create View character_set_client collation_connection
v2 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`%` SQL SECURITY DEFINER VIEW `v2` (`c1`, `c2`, `c3`) AS SELECT /*+ USE_INDEX_MERGE(`t1` )*/ `test`.`t1`.`c1` AS `c1`,`test`.`t1`.`c2` AS `c2`,`test`.`t1`.`c3` AS `c3` FROM `test`.`t1` WHERE `c1`<10 OR `c2`<10 AND `c3`<10 utf8mb4 utf8mb4_general_ci
v2 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`%` SQL SECURITY DEFINER VIEW `v2` (`c1`, `c2`, `c3`) AS SELECT /*+ USE_INDEX_MERGE(`t1` )*/ `test`.`t1`.`c1` AS `c1`,`test`.`t1`.`c2` AS `c2`,`test`.`t1`.`c3` AS `c3` FROM `test`.`t1` WHERE `c1`<10 OR `c2`<10 AND `c3`<10 utf8mb4 utf8mb4_bin
select * from v2 order by 1;
c1 c2 c3
1 1 1
Expand Down
1 change: 1 addition & 0 deletions cmd/explaintest/t/explain_generate_column_substitute.test
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
set names utf8mb4;
use test;
drop table if exists t;
create table t(a int, b real, c bigint as ((a+1)) virtual, e real as ((b+a)));
Expand Down
28 changes: 28 additions & 0 deletions expression/integration_serial_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,34 @@ func TestIssue17891(t *testing.T) {
tk.MustExec("create table test(id int, value set ('a','b','c') charset utf8mb4 collate utf8mb4_general_ci default 'a,B ,C');")
}

func TestIssue31174(t *testing.T) {
collate.SetNewCollationEnabledForTest(true)
defer collate.SetNewCollationEnabledForTest(false)

store, clean := testkit.CreateMockStore(t)
defer clean()

tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a char(4) collate utf8_general_ci primary key /*T![clustered_index] clustered */);")
tk.MustExec("insert into t values('`?');")
// The 'like' condition can not be used to construct the range.
tk.MustQuery("explain format = 'brief' select * from t where a like '`%';").Check(testkit.Rows(""+
"TableReader 8000.00 root data:Selection",
"└─Selection 8000.00 cop[tikv] like(test.t.a, \"`%\", 92)",
" └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo"))
tangenta marked this conversation as resolved.
Show resolved Hide resolved
tk.MustQuery("select * from t where a like '`%';").Check(testkit.Rows("`?"))

tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a char(4) collate binary primary key /*T![clustered_index] clustered */);")
tk.MustExec("insert into t values('`?');")
tk.MustQuery("explain format = 'brief' select * from t where a like '`%';").Check(testkit.Rows(""+
"TableReader 250.00 root data:TableRangeScan",
"└─TableRangeScan 250.00 cop[tikv] table:t range:[0x60,0x61), keep order:false, stats:pseudo"))
tk.MustQuery("select * from t where a like '`%';").Check(testkit.Rows("`?\x00\x00"))
Reminiscent marked this conversation as resolved.
Show resolved Hide resolved
}

func TestIssue20268(t *testing.T) {
collate.SetNewCollationEnabledForTest(true)
defer collate.SetNewCollationEnabledForTest(false)
Expand Down
7 changes: 5 additions & 2 deletions util/collate/collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,10 +331,13 @@ func IsCICollation(collate string) bool {
collate == "utf8_unicode_ci" || collate == "utf8mb4_unicode_ci"
}

// IsBinCollation returns if the collation is 'xx_bin'.
// IsBinCollation returns if the collation is 'xx_bin' or 'bin'.
// The function is to determine whether the sortkey of a char type of data under the collation is equal to the data itself,
// and both xx_bin and collationBin are satisfied.
func IsBinCollation(collate string) bool {
return collate == charset.CollationASCII || collate == charset.CollationLatin1 ||
collate == charset.CollationUTF8 || collate == charset.CollationUTF8MB4
collate == charset.CollationUTF8 || collate == charset.CollationUTF8MB4 ||
collate == charset.CollationBin
}

// CollationToProto converts collation from string to int32(used by protocol).
Expand Down
10 changes: 10 additions & 0 deletions util/ranger/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,16 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction

func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) bool {
_, collation := scalar.CharsetAndCollation()
if collate.NewCollationEnabled() && !collate.IsBinCollation(collation) {
tangenta marked this conversation as resolved.
Show resolved Hide resolved
// The algorithm constructs the range in byte-level: for example, ab% is mapped to [ab, ac] by adding 1 to the last byte.
// However, this is incorrect for non-binary collation strings because the sort key order is not the same as byte order.
// For example, "`%" is mapped to the range [`, a](where ` is 0x60 and a is 0x61).
// Because the collation utf8_general_ci is case-insensitive, a and A have the same sort key.
// Finally, the range comes to be [`, A], which is actually an empty range.
// See https://github.com/pingcap/tidb/issues/31174 for more details.
// In short, when the column type is non-binary collation string, we cannot use `like` expressions to generate the range.
return false
}
if !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().Collate, collation) {
return false
}
Expand Down