diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index b737fada05f0e..dd232322bcee8 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -797,14 +797,14 @@ create table t(a binary(16) not null, b varchar(2) default null, c varchar(100) explain format = 'brief' select * from t where a=x'FA34E1093CB428485734E3917F000000' and b='xb'; id estRows task access object operator info IndexLookUp 0.10 root -├─IndexRangeScan(Build) 0.10 cop[tikv] table:t, index:a(a, b) range:[0xFA34E1093CB428485734E3917F000000 "xb",0xFA34E1093CB428485734E3917F000000 "xb"], keep order:false, stats:pseudo +├─IndexRangeScan(Build) 0.10 cop[tikv] table:t, index:a(a, b) range:["\xfa4\xe1\t<\xb4(HW4\xe3\x91\x7f\x00\x00\x00" "xb","\xfa4\xe1\t<\xb4(HW4\xe3\x91\x7f\x00\x00\x00" "xb"], keep order:false, stats:pseudo └─TableRowIDScan(Probe) 0.10 cop[tikv] table:t keep order:false, stats:pseudo explain format = 'brief' update t set c = 'ssss' where a=x'FA34E1093CB428485734E3917F000000' and b='xb'; id estRows task access object operator info Update N/A root N/A └─SelectLock 0.10 root for update 0 └─IndexLookUp 0.10 root - ├─IndexRangeScan(Build) 0.10 cop[tikv] table:t, index:a(a, b) range:[0xFA34E1093CB428485734E3917F000000 "xb",0xFA34E1093CB428485734E3917F000000 "xb"], keep order:false, stats:pseudo + ├─IndexRangeScan(Build) 0.10 cop[tikv] table:t, index:a(a, b) range:["\xfa4\xe1\t<\xb4(HW4\xe3\x91\x7f\x00\x00\x00" "xb","\xfa4\xe1\t<\xb4(HW4\xe3\x91\x7f\x00\x00\x00" "xb"], keep order:false, stats:pseudo └─TableRowIDScan(Probe) 0.10 cop[tikv] table:t keep order:false, stats:pseudo drop table if exists t; create table t(a int, b int); diff --git a/cmd/explaintest/r/explain_generate_column_substitute.result b/cmd/explaintest/r/explain_generate_column_substitute.result index 9afb09538c3b5..db9b32f805e97 100644 --- a/cmd/explaintest/r/explain_generate_column_substitute.result +++ b/cmd/explaintest/r/explain_generate_column_substitute.result @@ -424,7 +424,8 @@ id estRows task access object operator info StreamAgg 1.00 root funcs:count(Column#6)->Column#4 └─IndexReader 1.00 root index:StreamAgg └─StreamAgg 1.00 cop[tikv] funcs:count(1)->Column#6 - └─IndexRangeScan 250.00 cop[tikv] table:tbl1, index:expression_index(md5(`s`)) range:["02e74f10e0327ad868d138f2b4fdd6f","02e74f10e0327ad868d138f2b4fdd6g"), keep order:false, stats:pseudo + └─Selection 250.00 cop[tikv] like(md5(cast(test.tbl1.s, var_string(20))), "02e74f10e0327ad868d138f2b4fdd6f%", 92) + └─IndexRangeScan 250.00 cop[tikv] table:tbl1, index:expression_index(md5(`s`)) range:["02e74f10e0327ad868d138f2b4fdd6f","02e74f10e0327ad868d138f2b4fdd6g"), keep order:false, stats:pseudo select count(*) from tbl1 use index() where md5(s) like '02e74f10e0327ad868d138f2b4fdd6f%'; count(*) 64 diff --git a/cmd/explaintest/r/range_scan_for_like.result b/cmd/explaintest/r/range_scan_for_like.result new file mode 100644 index 0000000000000..eb6d9002fb91a --- /dev/null +++ b/cmd/explaintest/r/range_scan_for_like.result @@ -0,0 +1,1085 @@ +drop table if exists t1, t2; +create table t1(a varchar(20) collate utf8mb4_bin, index ia(a)); +insert into t1 value('测试'),('测试 '),('xxx '); +explain format = brief select *,length(a) from t1 use index (ia) where a like '测试 %'; +id estRows task access object operator info +Projection 250.00 root test.t1.a, length(test.t1.a)->Column#3 +└─IndexReader 250.00 root index:Selection + └─Selection 250.00 cop[tikv] like(test.t1.a, "测试 %", 92) + └─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["测试","测试!"), keep order:false, stats:pseudo +explain format = brief select *,length(a) from t1 use index (ia) where a like '测试'; +id estRows task access object operator info +Projection 10.00 root test.t1.a, length(test.t1.a)->Column#3 +└─IndexReader 10.00 root index:Selection + └─Selection 10.00 cop[tikv] like(test.t1.a, "测试", 92) + └─IndexRangeScan 10.00 cop[tikv] table:t1, index:ia(a) range:["测试","测试"], keep order:false, stats:pseudo +select *,length(a) from t1 use index (ia) where a like '测试 %'; +a length(a) +测试 8 +select *,length(a) from t1 use index (ia) where a like '测试'; +a length(a) +测试 6 +explain format = brief select * from t1 use index (ia) where a like 'xxx_'; +id estRows task access object operator info +IndexReader 250.00 root index:Selection +└─Selection 250.00 cop[tikv] like(test.t1.a, "xxx_", 92) + └─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["xxx","xxy"), keep order:false, stats:pseudo +select * from t1 use index (ia) where a like 'xxx_'; +a +xxx +create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a)); +insert into t2 value('测试'),('测试 '); +explain format = brief select *,length(a) from t2 use index (ia) where a like '测试 %'; +id estRows task access object operator info +Projection 250.00 root test.t2.a, length(to_binary(test.t2.a))->Column#3 +└─IndexReader 250.00 root index:Selection + └─Selection 250.00 cop[tikv] like(test.t2.a, "测试 %", 92) + └─IndexRangeScan 250.00 cop[tikv] table:t2, index:ia(a) range:["\x89\a\xba%","\x89\a\xba%!"), keep order:false, stats:pseudo +explain format = brief select *,length(a) from t2 use index (ia) where a like '测试'; +id estRows task access object operator info +Projection 10.00 root test.t2.a, length(to_binary(test.t2.a))->Column#3 +└─IndexReader 10.00 root index:Selection + └─Selection 10.00 cop[tikv] like(test.t2.a, "测试", 92) + └─IndexRangeScan 10.00 cop[tikv] table:t2, index:ia(a) range:["\x89\a\xba%","\x89\a\xba%"], keep order:false, stats:pseudo +select *,length(a) from t2 use index (ia) where a like '测试 %'; +a length(a) +测试 6 +select *,length(a) from t2 use index (ia) where a like '测试'; +a length(a) +测试 4 +drop table if exists t; +create table t(a varchar(20) collate utf8mb4_general_ci, index ia(a)); +insert into t value('测试'),('测试Abc'),('测试 '),('你好'),('aABBccdd'),('Aa'),(''),(' '),(' '),(' 语言'),(' 语 言 '),('测测试 '),('测测试 '),(NULL); +explain select *, length(a) from t use index (ia) where a like '测试%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, "测试%", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["mK\x8b\xd5","mK\x8b\xd6"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like '测试%' order by a,_tidb_rowid; +a length(a) +测试 6 +测试 11 +测试Abc 9 +explain select *, length(a) from t use index (ia) where a like '测%%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, "测%%", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["mK","mL"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like '测%%' order by a,_tidb_rowid; +a length(a) +测测试 10 +测测试 13 +测试 6 +测试 11 +测试Abc 9 +explain select *, length(a) from t use index (ia) where a like '测%%试' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, "测%%试", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["mK","mL"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like '测%%试' order by a,_tidb_rowid; +a length(a) +测试 6 +explain select *, length(a) from t use index (ia) where a like '测试%%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, "测试%%", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["mK\x8b\xd5","mK\x8b\xd6"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like '测试%%' order by a,_tidb_rowid; +a length(a) +测试 6 +测试 11 +测试Abc 9 +explain select *, length(a) from t use index (ia) where a like '测试_' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, "测试_", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["mK\x8b\xd5","mK\x8b\xd6"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like '测试_' order by a,_tidb_rowid; +a length(a) +explain select *, length(a) from t use index (ia) where a like '你好%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, "你好%", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["O`Y}","O`Y~"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like '你好%' order by a,_tidb_rowid; +a length(a) +你好 6 +explain select *, length(a) from t use index (ia) where a like 'aa' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 10.00 root test.t.a, Column#3 +└─Projection_13 10.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 10.00 root index:Selection_15 + └─Selection_15 10.00 cop[tikv] like(test.t.a, "aa", 92) + └─IndexRangeScan_14 10.00 cop[tikv] table:t, index:ia(a) range:["\x00A\x00A","\x00A\x00A"], keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like 'aa' order by a,_tidb_rowid; +a length(a) +Aa 2 +explain select *, length(a) from t use index (ia) where a like 'aa%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, "aa%", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["\x00A\x00A","\x00A\x00B"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like 'aa%' order by a,_tidb_rowid; +a length(a) +Aa 2 +aABBccdd 8 +explain select *, length(a) from t use index (ia) where a like 'aa%cc' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, "aa%cc", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["\x00A\x00A","\x00A\x00B"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like 'aa%cc' order by a,_tidb_rowid; +a length(a) +explain select *, length(a) from t use index (ia) where a like '' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 10.00 root test.t.a, Column#3 +└─Projection_13 10.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 10.00 root index:Selection_15 + └─Selection_15 10.00 cop[tikv] like(test.t.a, "", 92) + └─IndexRangeScan_14 10.00 cop[tikv] table:t, index:ia(a) range:["",""], keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like '' order by a,_tidb_rowid; +a length(a) + 0 +explain select *, length(a) from t use index (ia) where a like ' ' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 10.00 root test.t.a, Column#3 +└─Projection_13 10.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 10.00 root index:Selection_15 + └─Selection_15 10.00 cop[tikv] like(test.t.a, " ", 92) + └─IndexRangeScan_14 10.00 cop[tikv] table:t, index:ia(a) range:["",""], keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like ' ' order by a,_tidb_rowid; +a length(a) + 1 +explain select *, length(a) from t use index (ia) where a like 'aa%dd' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, "aa%dd", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["\x00A\x00A","\x00A\x00B"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like 'aa%dd' order by a,_tidb_rowid; +a length(a) +aABBccdd 8 +explain select *, length(a) from t use index (ia) where a like 'aa%%dd' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, "aa%%dd", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["\x00A\x00A","\x00A\x00B"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like 'aa%%dd' order by a,_tidb_rowid; +a length(a) +aABBccdd 8 +explain select *, length(a) from t use index (ia) where a like 'aa_bccdd' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, "aa_bccdd", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["\x00A\x00A","\x00A\x00B"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like 'aa_bccdd' order by a,_tidb_rowid; +a length(a) +aABBccdd 8 +explain select *, length(a) from t use index (ia) where a like '%%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 8000.00 root test.t.a, Column#3 +└─Projection_13 8000.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 8000.00 root index:Selection_15 + └─Selection_15 8000.00 cop[tikv] like(test.t.a, "%%", 92) + └─IndexFullScan_14 10000.00 cop[tikv] table:t, index:ia(a) keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like '%%' order by a,_tidb_rowid; +a length(a) + 0 + 1 + 2 + 语 言 10 + 语言 7 +Aa 2 +aABBccdd 8 +你好 6 +测测试 10 +测测试 13 +测试 6 +测试 11 +测试Abc 9 +explain select *, length(a) from t use index (ia) where a like ' %%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, " %%", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["","\x00!"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like ' %%' order by a,_tidb_rowid; +a length(a) + 1 + 2 + 语 言 10 + 语言 7 +explain select *, length(a) from t use index (ia) where a like ' %%语言' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, " %%语言", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["","\x00!"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like ' %%语言' order by a,_tidb_rowid; +a length(a) + 语言 7 +explain select *, length(a) from t use index (ia) where a like ' 语 %' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, " 语 %", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["\x00 \x00 \x8b\xed","\x00 \x00 \x8b\xed\x00!"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like ' 语 %' order by a,_tidb_rowid; +a length(a) + 语 言 10 +explain select *, length(a) from t use index (ia) where a like ' 语 _' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Projection_13 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_16 250.00 root index:Selection_15 + └─Selection_15 250.00 cop[tikv] like(test.t.a, " 语 _", 92) + └─IndexRangeScan_14 250.00 cop[tikv] table:t, index:ia(a) range:["\x00 \x00 \x8b\xed","\x00 \x00 \x8b\xed\x00 \x00!"), keep order:true, stats:pseudo +select *, length(a) from t use index (ia) where a like ' 语 _' order by a,_tidb_rowid; +a length(a) +drop table t; +create table t(a varchar(20) collate utf8mb4_unicode_ci, unique index ia(a)); +insert into t value(''),('测试'),('测试abc'),('你好'),('aabbccdd'),(' 语言'),(' 语 言 '),('测测试 '); +explain select *, length(a) from t use index (ia) where a like '测试%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, "测试%", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\xfb@\xedK\xfbA\x8b\xd5","\xfb@\xedK\xfbA\x8b\xd6"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like '测试%' order by a,_tidb_rowid; +a length(a) +测试 6 +测试abc 9 +explain select *, length(a) from t use index (ia) where a like '测%%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, "测%%", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\xfb@\xedK","\xfb@\xedL"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like '测%%' order by a,_tidb_rowid; +a length(a) +测测试 13 +测试 6 +测试abc 9 +explain select *, length(a) from t use index (ia) where a like '测%%试' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, "测%%试", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\xfb@\xedK","\xfb@\xedL"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like '测%%试' order by a,_tidb_rowid; +a length(a) +测试 6 +explain select *, length(a) from t use index (ia) where a like '测试%%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, "测试%%", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\xfb@\xedK\xfbA\x8b\xd5","\xfb@\xedK\xfbA\x8b\xd6"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like '测试%%' order by a,_tidb_rowid; +a length(a) +测试 6 +测试abc 9 +explain select *, length(a) from t use index (ia) where a like '测试_' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, "测试_", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\xfb@\xedK\xfbA\x8b\xd5","\xfb@\xedK\xfbA\x8b\xd6"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like '测试_' order by a,_tidb_rowid; +a length(a) +explain select *, length(a) from t use index (ia) where a like '你好%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, "你好%", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\xfb@\xcf`\xfb@\xd9}","\xfb@\xcf`\xfb@\xd9~"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like '你好%' order by a,_tidb_rowid; +a length(a) +你好 6 +explain select *, length(a) from t use index (ia) where a like 'aa' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 1.00 root test.t.a, Column#3 +└─Sort_7 1.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 1.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─Selection_11 1.00 root like(test.t.a, "aa", 92) + └─Point_Get_10 1.00 root table:t, index:ia(a) +select *, length(a) from t use index (ia) where a like 'aa' order by a,_tidb_rowid; +a length(a) +explain select *, length(a) from t use index (ia) where a like 'aa%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, "aa%", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\x0e3\x0e3","\x0e3\x0e4"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like 'aa%' order by a,_tidb_rowid; +a length(a) +aabbccdd 8 +explain select *, length(a) from t use index (ia) where a like 'aa%cc' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, "aa%cc", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\x0e3\x0e3","\x0e3\x0e4"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like 'aa%cc' order by a,_tidb_rowid; +a length(a) +explain select *, length(a) from t use index (ia) where a like '' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 1.00 root test.t.a, Column#3 +└─Sort_7 1.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 1.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─Selection_11 1.00 root like(test.t.a, "", 92) + └─Point_Get_10 1.00 root table:t, index:ia(a) +select *, length(a) from t use index (ia) where a like '' order by a,_tidb_rowid; +a length(a) + 0 +explain select *, length(a) from t use index (ia) where a like 'aa%dd' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, "aa%dd", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\x0e3\x0e3","\x0e3\x0e4"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like 'aa%dd' order by a,_tidb_rowid; +a length(a) +aabbccdd 8 +explain select *, length(a) from t use index (ia) where a like 'aa%%dd' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, "aa%%dd", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\x0e3\x0e3","\x0e3\x0e4"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like 'aa%%dd' order by a,_tidb_rowid; +a length(a) +aabbccdd 8 +explain select *, length(a) from t use index (ia) where a like 'aa_bccdd' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, "aa_bccdd", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\x0e3\x0e3","\x0e3\x0e4"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like 'aa_bccdd' order by a,_tidb_rowid; +a length(a) +aabbccdd 8 +explain select *, length(a) from t use index (ia) where a like '%%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 8000.00 root test.t.a, Column#3 +└─Sort_7 8000.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 8000.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 8000.00 root index:Selection_11 + └─Selection_11 8000.00 cop[tikv] like(test.t.a, "%%", 92) + └─IndexFullScan_10 10000.00 cop[tikv] table:t, index:ia(a) keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like '%%' order by a,_tidb_rowid; +a length(a) + 0 + 语 言 10 + 语言 7 +aabbccdd 8 +你好 6 +测测试 13 +测试 6 +测试abc 9 +explain select *, length(a) from t use index (ia) where a like ' %%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, " %%", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["","\x02\n"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like ' %%' order by a,_tidb_rowid; +a length(a) + 语 言 10 + 语言 7 +explain select *, length(a) from t use index (ia) where a like ' %%语言' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, " %%语言", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["","\x02\n"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like ' %%语言' order by a,_tidb_rowid; +a length(a) + 语言 7 +explain select *, length(a) from t use index (ia) where a like ' 语 %' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, " 语 %", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\x02\t\x02\t\xfbA\x8b\xed","\x02\t\x02\t\xfbA\x8b\xed\x02\n"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like ' 语 %' order by a,_tidb_rowid; +a length(a) + 语 言 10 +explain select *, length(a) from t use index (ia) where a like ' 语 _' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, Column#3 +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─Projection_9 250.00 root test.t.a, length(test.t.a)->Column#3, test.t._tidb_rowid + └─IndexReader_12 250.00 root index:Selection_11 + └─Selection_11 250.00 cop[tikv] like(test.t.a, " 语 _", 92) + └─IndexRangeScan_10 250.00 cop[tikv] table:t, index:ia(a) range:["\x02\t\x02\t\xfbA\x8b\xed","\x02\t\x02\t\xfbA\x8b\xed\x02\t\x02\n"), keep order:false, stats:pseudo +select *, length(a) from t use index (ia) where a like ' 语 _' order by a,_tidb_rowid; +a length(a) +drop table t; +create table t(a varchar(20) collate utf8mb4_general_ci, b varchar(20) collate ascii_bin, c bigint, primary key(a(1), b) clustered); +insert into t (a, b, c) values +('测试1', 'asdfgh', 345346), +('你好2', 'qqwweerrrr', 987765), +('こんにちは3', 'zxcvbnn', 1111111), +('안녕하세요4', 'asdfgh ', 3333333333), +('Ciao5', ' asdfgh', 444400), +('Hola6', ' asdfgh ', 6666), +('Bonjour ', '', 888888888), +('Olá8', ' ', 9999999), +('Привет9', ' ', 321321), +('Hallo10', '12345', 35678); +explain select * from t use index (primary) where a like '测试%' and b like 'asd%' order by a,b; +id estRows task access object operator info +Sort_5 6.25 root test.t.a, test.t.b +└─TableReader_10 6.25 root data:Selection_9 + └─Selection_9 6.25 cop[tikv] like(test.t.a, "测试%", 92), like(test.t.b, "asd%", 92) + └─TableRangeScan_8 250.00 cop[tikv] table:t range:["mK","mL"), keep order:false, stats:pseudo +select * from t use index (primary) where a like '测试%' and b like 'asd%' order by a,b; +a b c +测试1 asdfgh 345346 +explain select * from t use index (primary) where a like '测试1' and b like 'asdfgh %' order by a,b; +id estRows task access object operator info +Sort_5 0.25 root test.t.a, test.t.b +└─TableReader_10 0.25 root data:Selection_9 + └─Selection_9 0.25 cop[tikv] like(test.t.a, "测试1", 92), like(test.t.b, "asdfgh %", 92) + └─TableRangeScan_8 10.00 cop[tikv] table:t range:["mK","mK"], keep order:false, stats:pseudo +select * from t use index (primary) where a like '测试1' and b like 'asdfgh %' order by a,b; +a b c +explain select * from t use index (primary) where a like 'こんにち_' and b like 'zxc%' order by a,b; +id estRows task access object operator info +Sort_5 6.25 root test.t.a, test.t.b +└─TableReader_10 6.25 root data:Selection_9 + └─Selection_9 6.25 cop[tikv] like(test.t.a, "こんにち_", 92), like(test.t.b, "zxc%", 92) + └─TableRangeScan_8 250.00 cop[tikv] table:t range:["0S","0T"), keep order:false, stats:pseudo +select * from t use index (primary) where a like 'こんにち_' and b like 'zxc%' order by a,b; +a b c +explain select * from t use index (primary) where a like '안녕하세요%' and b like 'asd%' order by a,b; +id estRows task access object operator info +Sort_5 6.25 root test.t.a, test.t.b +└─TableReader_10 6.25 root data:Selection_9 + └─Selection_9 6.25 cop[tikv] like(test.t.a, "안녕하세요%", 92), like(test.t.b, "asd%", 92) + └─TableRangeScan_8 250.00 cop[tikv] table:t range:["\xc5H","\xc5I"), keep order:false, stats:pseudo +select * from t use index (primary) where a like '안녕하세요%' and b like 'asd%' order by a,b; +a b c +안녕하세요4 asdfgh 3333333333 +explain select * from t use index (primary) where a like 'Ciáo%' and b like ' _%' order by a,b; +id estRows task access object operator info +Sort_5 6.25 root test.t.a, test.t.b +└─TableReader_10 6.25 root data:Selection_9 + └─Selection_9 6.25 cop[tikv] like(test.t.a, "Ciáo%", 92), like(test.t.b, " _%", 92) + └─TableRangeScan_8 250.00 cop[tikv] table:t range:["\x00C","\x00D"), keep order:false, stats:pseudo +select * from t use index (primary) where a like 'Ciáo%' and b like ' _%' order by a,b; +a b c +Ciao5 asdfgh 444400 +explain select * from t use index (primary) where a like '%HoLa%' and b like ' asdfgh' order by a,b; +id estRows task access object operator info +Sort_5 8.00 root test.t.a, test.t.b +└─TableReader_10 8.00 root data:Selection_9 + └─Selection_9 8.00 cop[tikv] like(test.t.a, "%HoLa%", 92), like(test.t.b, " asdfgh", 92) + └─TableFullScan_8 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (primary) where a like '%HoLa%' and b like ' asdfgh' order by a,b; +a b c +explain select * from t use index (primary) where a like 'bonjour _%' and b like '' order by a,b; +id estRows task access object operator info +Sort_5 0.25 root test.t.a, test.t.b +└─TableReader_10 0.25 root data:Selection_9 + └─Selection_9 0.25 cop[tikv] like(test.t.a, "bonjour _%", 92), like(test.t.b, "", 92) + └─TableRangeScan_8 250.00 cop[tikv] table:t range:["\x00B","\x00C"), keep order:false, stats:pseudo +select * from t use index (primary) where a like 'bonjour _%' and b like '' order by a,b; +a b c +Bonjour 888888888 +explain select * from t use index (primary) where a like 'OLa%' and b like '_' order by a,b; +id estRows task access object operator info +Sort_5 200.00 root test.t.a, test.t.b +└─TableReader_10 200.00 root data:Selection_9 + └─Selection_9 200.00 cop[tikv] like(test.t.a, "OLa%", 92), like(test.t.b, "_", 92) + └─TableRangeScan_8 250.00 cop[tikv] table:t range:["\x00O","\x00P"), keep order:false, stats:pseudo +select * from t use index (primary) where a like 'OLa%' and b like '_' order by a,b; +a b c +Olá8 9999999 +explain select * from t use index (primary) where a like 'Приве__' and b like ' %' order by a,b; +id estRows task access object operator info +Sort_5 6.25 root test.t.a, test.t.b +└─TableReader_10 6.25 root data:Selection_9 + └─Selection_9 6.25 cop[tikv] like(test.t.a, "Приве__", 92), like(test.t.b, " %", 92) + └─TableRangeScan_8 250.00 cop[tikv] table:t range:["\x04\x1f","\x04 "), keep order:false, stats:pseudo +select * from t use index (primary) where a like 'Приве__' and b like ' %' order by a,b; +a b c +Привет9 321321 +explain select * from t use index (primary) where a like 'Hallo%' and b like '123%' order by a,b; +id estRows task access object operator info +Sort_5 6.25 root test.t.a, test.t.b +└─TableReader_10 6.25 root data:Selection_9 + └─Selection_9 6.25 cop[tikv] like(test.t.a, "Hallo%", 92), like(test.t.b, "123%", 92) + └─TableRangeScan_8 250.00 cop[tikv] table:t range:["\x00H","\x00I"), keep order:false, stats:pseudo +select * from t use index (primary) where a like 'Hallo%' and b like '123%' order by a,b; +a b c +Hallo10 12345 35678 +drop table t; +create table t(a varchar(20) collate gbk_chinese_ci, b varchar(20) collate latin1_bin, c bigint, primary key(a, b(5)) nonclustered); +insert into t (a, b, c) values +('测试1', 'asdfgh', 345346), +('你好2', 'qqwweerrrr', 987765), +('zxcvbnn',0xE38193E38293E381ABE381A1E381AF33, 1111111), +('asdfgh ', 0xEC9588EB8595ED9598EC84B8EC9A9434, 3333333333), +('Ciao5', ' asdfgh', 444400), +(' asdfgh ', 'Hola6', 6666), +('Bonjour ', '', 888888888), +('Olá8', ' ', 9999999), +('Привет9', ' ', 321321), +(' ', '12345', 35678); +set names utf8mb4; +explain select * from t use index (primary) where a like '测试%' and b like 'asd%' order by a,b; +id estRows task access object operator info +Sort_5 6.25 root test.t.a, test.t.b +└─IndexLookUp_12 6.25 root + ├─Selection_10(Build) 6.25 cop[tikv] like(test.t.a, "测试%", 92) + │ └─IndexRangeScan_8 250.00 cop[tikv] table:t, index:PRIMARY(a, b) range:["\x89\a\xba%","\x89\a\xba&"), keep order:false, stats:pseudo + └─Selection_11(Probe) 6.25 cop[tikv] like(test.t.b, "asd%", 92) + └─TableRowIDScan_9 6.25 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (primary) where a like '测试%' and b like 'asd%' order by a,b; +a b c +测试1 asdfgh 345346 +explain select * from t use index (primary) where a like '测试1' and b like 'asdfgh %' order by a,b; +id estRows task access object operator info +Sort_5 0.25 root test.t.a, test.t.b +└─IndexLookUp_12 0.25 root + ├─Selection_10(Build) 0.25 cop[tikv] like(test.t.a, "测试1", 92) + │ └─IndexRangeScan_8 10.00 cop[tikv] table:t, index:PRIMARY(a, b) range:["\x89\a\xba%1","\x89\a\xba%1"], keep order:false, stats:pseudo + └─Selection_11(Probe) 0.25 cop[tikv] like(test.t.b, "asdfgh %", 92) + └─TableRowIDScan_9 0.25 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (primary) where a like '测试1' and b like 'asdfgh %' order by a,b; +a b c +set names latin1; +explain select * from t use index (primary) where b like 'こんにち_' and a like 'zxc%' order by a,b; +id estRows task access object operator info +Sort_5 6.25 root test.t.a, test.t.b +└─IndexLookUp_12 6.25 root + ├─Selection_10(Build) 6.25 cop[tikv] like(test.t.a, "zxc%", 92) + │ └─IndexRangeScan_8 250.00 cop[tikv] table:t, index:PRIMARY(a, b) range:["ZXC","ZXD"), keep order:false, stats:pseudo + └─Selection_11(Probe) 6.25 cop[tikv] like(test.t.b, "こんにち_", 92) + └─TableRowIDScan_9 6.25 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (primary) where b like 'こんにち_' and a like 'zxc%' order by a,b; +a b c +explain select * from t use index (primary) where b like '안녕하세요%' and a like 'asd%' order by a,b; +id estRows task access object operator info +Sort_5 6.25 root test.t.a, test.t.b +└─IndexLookUp_12 6.25 root + ├─Selection_10(Build) 6.25 cop[tikv] like(test.t.a, "asd%", 92) + │ └─IndexRangeScan_8 250.00 cop[tikv] table:t, index:PRIMARY(a, b) range:["ASD","ASE"), keep order:false, stats:pseudo + └─Selection_11(Probe) 6.25 cop[tikv] like(test.t.b, "안녕하세요%", 92) + └─TableRowIDScan_9 6.25 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (primary) where b like '안녕하세요%' and a like 'asd%' order by a,b; +a b c +asdfgh 안녕하세요4 3333333333 +set names utf8mb4; +explain select * from t use index (primary) where a like 'Ciao%' and b like ' _%' order by a,b; +id estRows task access object operator info +Sort_5 6.25 root test.t.a, test.t.b +└─IndexLookUp_12 6.25 root + ├─Selection_10(Build) 6.25 cop[tikv] like(test.t.a, "Ciao%", 92) + │ └─IndexRangeScan_8 250.00 cop[tikv] table:t, index:PRIMARY(a, b) range:["CIAO","CIAP"), keep order:false, stats:pseudo + └─Selection_11(Probe) 6.25 cop[tikv] like(test.t.b, " _%", 92) + └─TableRowIDScan_9 6.25 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (primary) where a like 'Ciao%' and b like ' _%' order by a,b; +a b c +Ciao5 asdfgh 444400 +explain select * from t use index (primary) where b like 'HoLa%' and a like ' asdfgh' order by a,b; +id estRows task access object operator info +Sort_5 0.25 root test.t.a, test.t.b +└─IndexLookUp_12 0.25 root + ├─Selection_10(Build) 0.25 cop[tikv] like(test.t.a, " asdfgh", 92) + │ └─IndexRangeScan_8 10.00 cop[tikv] table:t, index:PRIMARY(a, b) range:[" ASDFGH"," ASDFGH"], keep order:false, stats:pseudo + └─Selection_11(Probe) 0.25 cop[tikv] like(test.t.b, "HoLa%", 92) + └─TableRowIDScan_9 0.25 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (primary) where b like 'HoLa%' and a like ' asdfgh' order by a,b; +a b c +explain select * from t use index (primary) where a like 'bonjour _%' and b like '' order by a,b; +id estRows task access object operator info +Sort_5 0.25 root test.t.a, test.t.b +└─IndexLookUp_12 0.25 root + ├─Selection_10(Build) 6.25 cop[tikv] like(test.t.a, "bonjour _%", 92) + │ └─IndexRangeScan_8 250.00 cop[tikv] table:t, index:PRIMARY(a, b) range:["BONJOUR","BONJOUR!"), keep order:false, stats:pseudo + └─Selection_11(Probe) 0.25 cop[tikv] like(test.t.b, "", 92) + └─TableRowIDScan_9 6.25 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (primary) where a like 'bonjour _%' and b like '' order by a,b; +a b c +Bonjour 888888888 +explain select * from t use index (primary) where a like 'OLá' and b like '_' order by a,b; +id estRows task access object operator info +Sort_5 8.00 root test.t.a, test.t.b +└─IndexLookUp_12 8.00 root + ├─Selection_10(Build) 8.00 cop[tikv] like(test.t.a, "OLá", 92) + │ └─IndexRangeScan_8 10.00 cop[tikv] table:t, index:PRIMARY(a, b) range:["OL\x82\xb2","OL\x82\xb2"], keep order:false, stats:pseudo + └─Selection_11(Probe) 8.00 cop[tikv] like(test.t.b, "_", 92) + └─TableRowIDScan_9 8.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (primary) where a like 'OLá' and b like '_' order by a,b; +a b c +explain select * from t use index (primary) where a like 'Приве__' and b like ' %' order by a,b; +id estRows task access object operator info +Sort_5 6.25 root test.t.a, test.t.b +└─IndexLookUp_12 6.25 root + ├─Selection_10(Build) 6.25 cop[tikv] like(test.t.a, "Приве__", 92) + │ └─IndexRangeScan_8 250.00 cop[tikv] table:t, index:PRIMARY(a, b) range:["\x83b\x83e\x83U\x83G\x83M","\x83b\x83e\x83U\x83G\x83N"), keep order:false, stats:pseudo + └─Selection_11(Probe) 6.25 cop[tikv] like(test.t.b, " %", 92) + └─TableRowIDScan_9 6.25 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (primary) where a like 'Приве__' and b like ' %' order by a,b; +a b c +Привет9 321321 +explain select * from t use index (primary) where a like ' %' and b like '123%' order by a,b; +id estRows task access object operator info +Sort_5 6.25 root test.t.a, test.t.b +└─IndexLookUp_12 6.25 root + ├─Selection_10(Build) 6.25 cop[tikv] like(test.t.a, " %", 92) + │ └─IndexRangeScan_8 250.00 cop[tikv] table:t, index:PRIMARY(a, b) range:["","!"), keep order:false, stats:pseudo + └─Selection_11(Probe) 6.25 cop[tikv] like(test.t.b, "123%", 92) + └─TableRowIDScan_9 6.25 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (primary) where a like ' %' and b like '123%' order by a,b; +a b c + 12345 35678 +drop table t; +create table t(a varchar(20) collate utf8mb4_general_ci, b bigint, index ia(a(3),b)); +insert into t value +('测试',222), +('测试Abc',324), +('测试 ',543), +('你好',111), +('aABBccdd',890), +('A',456), +('Aa',456), +('aab',456), +('aabB',456), +('',234), +(' ',11111), +(' ',66666), +(' 语言',55555), +(' 语 言',3579), +('测测试 ',2468), +('测测试 ',99999), +(NULL,10); +explain select * from t use index (ia) where a > 'aabb' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 3333.33 root test.t.a, test.t.b +└─Sort_7 3333.33 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 3333.33 root + ├─IndexRangeScan_9(Build) 3333.33 cop[tikv] table:t, index:ia(a, b) range:["\x00A\x00A\x00B",+inf], keep order:false, stats:pseudo + └─Selection_11(Probe) 3333.33 cop[tikv] gt(test.t.a, "aabb") + └─TableRowIDScan_10 3333.33 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a > 'aabb' order by a,_tidb_rowid; +a b +aABBccdd 890 +你好 111 +测测试 2468 +测测试 99999 +测试 222 +测试 543 +测试Abc 324 +explain select * from t use index (ia) where a > 'aab' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 3333.33 root test.t.a, test.t.b +└─Sort_7 3333.33 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 3333.33 root + ├─IndexRangeScan_9(Build) 3333.33 cop[tikv] table:t, index:ia(a, b) range:["\x00A\x00A\x00B",+inf], keep order:false, stats:pseudo + └─Selection_11(Probe) 3333.33 cop[tikv] gt(test.t.a, "aab") + └─TableRowIDScan_10 3333.33 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a > 'aab' order by a,_tidb_rowid; +a b +aabB 456 +aABBccdd 890 +你好 111 +测测试 2468 +测测试 99999 +测试 222 +测试 543 +测试Abc 324 +explain select * from t use index (ia) where a > 'aa' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 3333.33 root test.t.a, test.t.b +└─Sort_7 3333.33 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 3333.33 root + ├─IndexRangeScan_9(Build) 3333.33 cop[tikv] table:t, index:ia(a, b) range:("\x00A\x00A",+inf], keep order:false, stats:pseudo + └─Selection_11(Probe) 3333.33 cop[tikv] gt(test.t.a, "aa") + └─TableRowIDScan_10 3333.33 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a > 'aa' order by a,_tidb_rowid; +a b +aab 456 +aabB 456 +aABBccdd 890 +你好 111 +测测试 2468 +测测试 99999 +测试 222 +测试 543 +测试Abc 324 +explain select * from t use index (ia) where a < 'aabb' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 3323.33 root test.t.a, test.t.b +└─Sort_7 3323.33 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 3323.33 root + ├─IndexRangeScan_9(Build) 3323.33 cop[tikv] table:t, index:ia(a, b) range:[-inf,"\x00A\x00A\x00B"], keep order:false, stats:pseudo + └─Selection_11(Probe) 3323.33 cop[tikv] lt(test.t.a, "aabb") + └─TableRowIDScan_10 3323.33 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a < 'aabb' order by a,_tidb_rowid; +a b + 234 + 11111 + 66666 + 语 言 3579 + 语言 55555 +A 456 +Aa 456 +aab 456 +explain select * from t use index (ia) where a < 'aab' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 3323.33 root test.t.a, test.t.b +└─Sort_7 3323.33 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 3323.33 root + ├─IndexRangeScan_9(Build) 3323.33 cop[tikv] table:t, index:ia(a, b) range:[-inf,"\x00A\x00A\x00B"), keep order:false, stats:pseudo + └─Selection_11(Probe) 3323.33 cop[tikv] lt(test.t.a, "aab") + └─TableRowIDScan_10 3323.33 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a < 'aab' order by a,_tidb_rowid; +a b + 234 + 11111 + 66666 + 语 言 3579 + 语言 55555 +A 456 +Aa 456 +explain select * from t use index (ia) where a < 'aa' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 3323.33 root test.t.a, test.t.b +└─Sort_7 3323.33 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 3323.33 root + ├─IndexRangeScan_9(Build) 3323.33 cop[tikv] table:t, index:ia(a, b) range:[-inf,"\x00A\x00A"), keep order:false, stats:pseudo + └─Selection_11(Probe) 3323.33 cop[tikv] lt(test.t.a, "aa") + └─TableRowIDScan_10 3323.33 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a < 'aa' order by a,_tidb_rowid; +a b + 234 + 11111 + 66666 + 语 言 3579 + 语言 55555 +A 456 +explain select * from t use index (ia) where a != 'aa' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 6656.67 root test.t.a, test.t.b +└─Sort_7 6656.67 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 6656.67 root + ├─IndexFullScan_9(Build) 10000.00 cop[tikv] table:t, index:ia(a, b) keep order:false, stats:pseudo + └─Selection_11(Probe) 6656.67 cop[tikv] ne(test.t.a, "aa") + └─TableRowIDScan_10 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a != 'aa' order by a,_tidb_rowid; +a b + 234 + 11111 + 66666 + 语 言 3579 + 语言 55555 +A 456 +aab 456 +aabB 456 +aABBccdd 890 +你好 111 +测测试 2468 +测测试 99999 +测试 222 +测试 543 +测试Abc 324 +explain select * from t use index (ia) where a != 'aaBbc' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 6656.67 root test.t.a, test.t.b +└─Sort_7 6656.67 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 6656.67 root + ├─IndexFullScan_9(Build) 10000.00 cop[tikv] table:t, index:ia(a, b) keep order:false, stats:pseudo + └─Selection_11(Probe) 6656.67 cop[tikv] ne(test.t.a, "aaBbc") + └─TableRowIDScan_10 10000.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a != 'aaBbc' order by a,_tidb_rowid; +a b + 234 + 11111 + 66666 + 语 言 3579 + 语言 55555 +A 456 +Aa 456 +aab 456 +aabB 456 +aABBccdd 890 +你好 111 +测测试 2468 +测测试 99999 +测试 222 +测试 543 +测试Abc 324 +explain select * from t use index (ia) where a like '测试abc' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 10.00 root test.t.a, test.t.b +└─Sort_7 10.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 10.00 root + ├─IndexRangeScan_9(Build) 10.00 cop[tikv] table:t, index:ia(a, b) range:["mK\x8b\xd5\x00A","mK\x8b\xd5\x00A"], keep order:false, stats:pseudo + └─Selection_11(Probe) 10.00 cop[tikv] like(test.t.a, "测试abc", 92) + └─TableRowIDScan_10 10.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a like '测试abc' order by a,_tidb_rowid; +a b +测试Abc 324 +explain select * from t use index (ia) where a = '测试abc' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 10.00 root test.t.a, test.t.b +└─Sort_7 10.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 10.00 root + ├─IndexRangeScan_9(Build) 10.00 cop[tikv] table:t, index:ia(a, b) range:["mK\x8b\xd5\x00A","mK\x8b\xd5\x00A"], keep order:false, stats:pseudo + └─Selection_11(Probe) 10.00 cop[tikv] eq(test.t.a, "测试abc") + └─TableRowIDScan_10 10.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a = '测试abc' order by a,_tidb_rowid; +a b +测试Abc 324 +explain select * from t use index (ia) where a like 'aa' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 10.00 root test.t.a, test.t.b +└─Sort_7 10.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 10.00 root + ├─IndexRangeScan_9(Build) 10.00 cop[tikv] table:t, index:ia(a, b) range:["\x00A\x00A","\x00A\x00A"], keep order:false, stats:pseudo + └─Selection_11(Probe) 10.00 cop[tikv] like(test.t.a, "aa", 92) + └─TableRowIDScan_10 10.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a like 'aa' order by a,_tidb_rowid; +a b +Aa 456 +explain select * from t use index (ia) where a = 'aa' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 10.00 root test.t.a, test.t.b +└─Sort_7 10.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 10.00 root + ├─IndexRangeScan_9(Build) 10.00 cop[tikv] table:t, index:ia(a, b) range:["\x00A\x00A","\x00A\x00A"], keep order:false, stats:pseudo + └─Selection_11(Probe) 10.00 cop[tikv] eq(test.t.a, "aa") + └─TableRowIDScan_10 10.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a = 'aa' order by a,_tidb_rowid; +a b +Aa 456 +explain select * from t use index (ia) where a like '测测试 ' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 10.00 root test.t.a, test.t.b +└─Sort_7 10.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 10.00 root + ├─IndexRangeScan_9(Build) 10.00 cop[tikv] table:t, index:ia(a, b) range:["mKmK\x8b\xd5","mKmK\x8b\xd5"], keep order:false, stats:pseudo + └─Selection_11(Probe) 10.00 cop[tikv] like(test.t.a, "测测试 ", 92) + └─TableRowIDScan_10 10.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a like '测测试 ' order by a,_tidb_rowid; +a b +测测试 2468 +explain select * from t use index (ia) where a = '测测试 ' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 10.00 root test.t.a, test.t.b +└─Sort_7 10.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 10.00 root + ├─IndexRangeScan_9(Build) 10.00 cop[tikv] table:t, index:ia(a, b) range:["mKmK\x8b\xd5","mKmK\x8b\xd5"], keep order:false, stats:pseudo + └─Selection_11(Probe) 10.00 cop[tikv] eq(test.t.a, "测测试 ") + └─TableRowIDScan_10 10.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a = '测测试 ' order by a,_tidb_rowid; +a b +测测试 2468 +测测试 99999 +explain select * from t use index (ia) where a like ' 语 言' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 10.00 root test.t.a, test.t.b +└─Sort_7 10.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 10.00 root + ├─IndexRangeScan_9(Build) 10.00 cop[tikv] table:t, index:ia(a, b) range:["\x00 \x00 \x8b\xed","\x00 \x00 \x8b\xed"], keep order:false, stats:pseudo + └─Selection_11(Probe) 10.00 cop[tikv] like(test.t.a, " 语 言", 92) + └─TableRowIDScan_10 10.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a like ' 语 言' order by a,_tidb_rowid; +a b + 语 言 3579 +explain select * from t use index (ia) where a = ' 语 言' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 10.00 root test.t.a, test.t.b +└─Sort_7 10.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 10.00 root + ├─IndexRangeScan_9(Build) 10.00 cop[tikv] table:t, index:ia(a, b) range:["\x00 \x00 \x8b\xed","\x00 \x00 \x8b\xed"], keep order:false, stats:pseudo + └─Selection_11(Probe) 10.00 cop[tikv] eq(test.t.a, " 语 言") + └─TableRowIDScan_10 10.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a = ' 语 言' order by a,_tidb_rowid; +a b + 语 言 3579 +explain select * from t use index (ia) where a like '测试%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, test.t.b +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 250.00 root + ├─IndexRangeScan_9(Build) 250.00 cop[tikv] table:t, index:ia(a, b) range:["mK\x8b\xd5","mK\x8b\xd6"), keep order:false, stats:pseudo + └─Selection_11(Probe) 250.00 cop[tikv] like(test.t.a, "测试%", 92) + └─TableRowIDScan_10 250.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a like '测试%' order by a,_tidb_rowid; +a b +测试 222 +测试 543 +测试Abc 324 +explain select * from t use index (ia) where a like '测_' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, test.t.b +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 250.00 root + ├─IndexRangeScan_9(Build) 250.00 cop[tikv] table:t, index:ia(a, b) range:["mK","mL"), keep order:false, stats:pseudo + └─Selection_11(Probe) 250.00 cop[tikv] like(test.t.a, "测_", 92) + └─TableRowIDScan_10 250.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a like '测_' order by a,_tidb_rowid; +a b +测试 222 +explain select * from t use index (ia) where a like '测测试 %' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, test.t.b +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 250.00 root + ├─IndexRangeScan_9(Build) 250.00 cop[tikv] table:t, index:ia(a, b) range:["mKmK\x8b\xd5","mKmK\x8b\xd6"), keep order:false, stats:pseudo + └─Selection_11(Probe) 250.00 cop[tikv] like(test.t.a, "测测试 %", 92) + └─TableRowIDScan_10 250.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a like '测测试 %' order by a,_tidb_rowid; +a b +测测试 2468 +测测试 99999 +explain select * from t use index (ia) where a like '测试a__' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, test.t.b +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 250.00 root + ├─IndexRangeScan_9(Build) 250.00 cop[tikv] table:t, index:ia(a, b) range:["mK\x8b\xd5\x00A","mK\x8b\xd5\x00B"), keep order:false, stats:pseudo + └─Selection_11(Probe) 250.00 cop[tikv] like(test.t.a, "测试a__", 92) + └─TableRowIDScan_10 250.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a like '测试a__' order by a,_tidb_rowid; +a b +测试Abc 324 +explain select * from t use index (ia) where a like '测试 __' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, test.t.b +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 250.00 root + ├─IndexRangeScan_9(Build) 250.00 cop[tikv] table:t, index:ia(a, b) range:["mK\x8b\xd5","mK\x8b\xd5\x00!"), keep order:false, stats:pseudo + └─Selection_11(Probe) 250.00 cop[tikv] like(test.t.a, "测试 __", 92) + └─TableRowIDScan_10 250.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a like '测试 __' order by a,_tidb_rowid; +a b +测试 543 +explain select * from t use index (ia) where a like ' _' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, test.t.b +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 250.00 root + ├─IndexRangeScan_9(Build) 250.00 cop[tikv] table:t, index:ia(a, b) range:["","\x00!"), keep order:false, stats:pseudo + └─Selection_11(Probe) 250.00 cop[tikv] like(test.t.a, " _", 92) + └─TableRowIDScan_10 250.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a like ' _' order by a,_tidb_rowid; +a b +explain select * from t use index (ia) where a like ' %' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, test.t.b +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 250.00 root + ├─IndexRangeScan_9(Build) 250.00 cop[tikv] table:t, index:ia(a, b) range:["","\x00 \x00 \x00!"), keep order:false, stats:pseudo + └─Selection_11(Probe) 250.00 cop[tikv] like(test.t.a, " %", 92) + └─TableRowIDScan_10 250.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a like ' %' order by a,_tidb_rowid; +a b + 66666 +explain select * from t use index (ia) where a like ' 语言%%' order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 250.00 root test.t.a, test.t.b +└─Sort_7 250.00 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 250.00 root + ├─IndexRangeScan_9(Build) 250.00 cop[tikv] table:t, index:ia(a, b) range:["\x00 \x8b\xed\x8a\x00","\x00 \x8b\xed\x8a\x01"), keep order:false, stats:pseudo + └─Selection_11(Probe) 250.00 cop[tikv] like(test.t.a, " 语言%%", 92) + └─TableRowIDScan_10 250.00 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a like ' 语言%%' order by a,_tidb_rowid; +a b + 语言 55555 +explain select * from t use index (ia) where a not in ('aabc','dd') order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 3583.33 root test.t.a, test.t.b +└─Sort_7 3583.33 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_12 3583.33 root + ├─IndexRangeScan_9(Build) 3583.33 cop[tikv] table:t, index:ia(a, b) range:(NULL,"\x00D\x00D"), ("\x00D\x00D",+inf], keep order:false, stats:pseudo + └─Selection_11(Probe) 3583.33 cop[tikv] not(in(test.t.a, "aabc", "dd")) + └─TableRowIDScan_10 3583.33 cop[tikv] table:t keep order:false, stats:pseudo +select * from t use index (ia) where a not in ('aabc','dd') order by a,_tidb_rowid; +a b + 234 + 11111 + 66666 + 语 言 3579 + 语言 55555 +A 456 +Aa 456 +aab 456 +aabB 456 +aABBccdd 890 +你好 111 +测测试 2468 +测测试 99999 +测试 222 +测试 543 +测试Abc 324 +explain select * from t where a >= 'aabb' and a <= 'aabd' and b = 456 order by a,_tidb_rowid; +id estRows task access object operator info +Projection_6 0.01 root test.t.a, test.t.b +└─Sort_7 0.01 root test.t.a, test.t._tidb_rowid + └─IndexLookUp_16 0.01 root + ├─Selection_14(Build) 0.01 cop[tikv] eq(test.t.b, 456) + │ └─IndexRangeScan_12 10.00 cop[tikv] table:t, index:ia(a, b) range:["\x00A\x00A\x00B","\x00A\x00A\x00B"], keep order:false, stats:pseudo + └─Selection_15(Probe) 0.01 cop[tikv] ge(test.t.a, "aabb"), le(test.t.a, "aabd") + └─TableRowIDScan_13 0.01 cop[tikv] table:t keep order:false, stats:pseudo +select * from t where a >= 'aabb' and a <= 'aabd' and b = 456 order by a,_tidb_rowid; +a b +aabB 456 diff --git a/cmd/explaintest/t/range_scan_for_like.test b/cmd/explaintest/t/range_scan_for_like.test new file mode 100644 index 0000000000000..3d868f6f1a739 --- /dev/null +++ b/cmd/explaintest/t/range_scan_for_like.test @@ -0,0 +1,248 @@ +drop table if exists t1, t2; +create table t1(a varchar(20) collate utf8mb4_bin, index ia(a)); +insert into t1 value('测试'),('测试 '),('xxx '); +explain format = brief select *,length(a) from t1 use index (ia) where a like '测试 %'; +explain format = brief select *,length(a) from t1 use index (ia) where a like '测试'; +select *,length(a) from t1 use index (ia) where a like '测试 %'; +select *,length(a) from t1 use index (ia) where a like '测试'; +explain format = brief select * from t1 use index (ia) where a like 'xxx_'; +select * from t1 use index (ia) where a like 'xxx_'; +create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a)); +insert into t2 value('测试'),('测试 '); +explain format = brief select *,length(a) from t2 use index (ia) where a like '测试 %'; +explain format = brief select *,length(a) from t2 use index (ia) where a like '测试'; +select *,length(a) from t2 use index (ia) where a like '测试 %'; +select *,length(a) from t2 use index (ia) where a like '测试'; + +# Suite 1: utf8mb4_general_ci + normal index +drop table if exists t; +create table t(a varchar(20) collate utf8mb4_general_ci, index ia(a)); +insert into t value('测试'),('测试Abc'),('测试 '),('你好'),('aABBccdd'),('Aa'),(''),(' '),(' '),(' 语言'),(' 语 言 '),('测测试 '),('测测试 '),(NULL); +# test cases for the pattern string cover: +# with/without wildcard +# start/end with wildcard +# [non-]ascii characters +# [only] contain empty string/space +explain select *, length(a) from t use index (ia) where a like '测试%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '测试%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '测%%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '测%%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '测%%试' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '测%%试' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '测试%%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '测试%%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '测试_' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '测试_' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '你好%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '你好%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like 'aa' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like 'aa' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like 'aa%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like 'aa%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like 'aa%cc' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like 'aa%cc' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like ' ' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like ' ' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like 'aa%dd' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like 'aa%dd' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like 'aa%%dd' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like 'aa%%dd' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like 'aa_bccdd' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like 'aa_bccdd' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '%%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '%%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like ' %%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like ' %%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like ' %%语言' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like ' %%语言' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like ' 语 %' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like ' 语 %' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like ' 语 _' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like ' 语 _' order by a,_tidb_rowid; +drop table t; +# Suite 2: utf8mb4_unicode_ci + unique index +create table t(a varchar(20) collate utf8mb4_unicode_ci, unique index ia(a)); +insert into t value(''),('测试'),('测试abc'),('你好'),('aabbccdd'),(' 语言'),(' 语 言 '),('测测试 '); +# test cases for the pattern string are the same with Suite 1 +explain select *, length(a) from t use index (ia) where a like '测试%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '测试%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '测%%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '测%%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '测%%试' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '测%%试' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '测试%%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '测试%%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '测试_' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '测试_' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '你好%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '你好%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like 'aa' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like 'aa' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like 'aa%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like 'aa%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like 'aa%cc' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like 'aa%cc' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like 'aa%dd' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like 'aa%dd' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like 'aa%%dd' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like 'aa%%dd' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like 'aa_bccdd' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like 'aa_bccdd' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like '%%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like '%%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like ' %%' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like ' %%' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like ' %%语言' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like ' %%语言' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like ' 语 %' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like ' 语 %' order by a,_tidb_rowid; +explain select *, length(a) from t use index (ia) where a like ' 语 _' order by a,_tidb_rowid; +select *, length(a) from t use index (ia) where a like ' 语 _' order by a,_tidb_rowid; +drop table t; +# Suite 3: utf8mb4_general_ci + ascii_bin + multi-column index + prefix index + primary key (clustered) +create table t(a varchar(20) collate utf8mb4_general_ci, b varchar(20) collate ascii_bin, c bigint, primary key(a(1), b) clustered); +insert into t (a, b, c) values +('测试1', 'asdfgh', 345346), +('你好2', 'qqwweerrrr', 987765), +('こんにちは3', 'zxcvbnn', 1111111), +('안녕하세요4', 'asdfgh ', 3333333333), +('Ciao5', ' asdfgh', 444400), +('Hola6', ' asdfgh ', 6666), +('Bonjour ', '', 888888888), +('Olá8', ' ', 9999999), +('Привет9', ' ', 321321), +('Hallo10', '12345', 35678); +explain select * from t use index (primary) where a like '测试%' and b like 'asd%' order by a,b; +select * from t use index (primary) where a like '测试%' and b like 'asd%' order by a,b; +explain select * from t use index (primary) where a like '测试1' and b like 'asdfgh %' order by a,b; +select * from t use index (primary) where a like '测试1' and b like 'asdfgh %' order by a,b; +explain select * from t use index (primary) where a like 'こんにち_' and b like 'zxc%' order by a,b; +select * from t use index (primary) where a like 'こんにち_' and b like 'zxc%' order by a,b; +explain select * from t use index (primary) where a like '안녕하세요%' and b like 'asd%' order by a,b; +select * from t use index (primary) where a like '안녕하세요%' and b like 'asd%' order by a,b; +explain select * from t use index (primary) where a like 'Ciáo%' and b like ' _%' order by a,b; +select * from t use index (primary) where a like 'Ciáo%' and b like ' _%' order by a,b; +explain select * from t use index (primary) where a like '%HoLa%' and b like ' asdfgh' order by a,b; +select * from t use index (primary) where a like '%HoLa%' and b like ' asdfgh' order by a,b; +explain select * from t use index (primary) where a like 'bonjour _%' and b like '' order by a,b; +select * from t use index (primary) where a like 'bonjour _%' and b like '' order by a,b; +explain select * from t use index (primary) where a like 'OLa%' and b like '_' order by a,b; +select * from t use index (primary) where a like 'OLa%' and b like '_' order by a,b; +explain select * from t use index (primary) where a like 'Приве__' and b like ' %' order by a,b; +select * from t use index (primary) where a like 'Приве__' and b like ' %' order by a,b; +explain select * from t use index (primary) where a like 'Hallo%' and b like '123%' order by a,b; +select * from t use index (primary) where a like 'Hallo%' and b like '123%' order by a,b; +drop table t; +# Suite 4: gbk_chinese_ci + latin1_bin + multi-column index + prefix index + primary key (nonclustered) +create table t(a varchar(20) collate gbk_chinese_ci, b varchar(20) collate latin1_bin, c bigint, primary key(a, b(5)) nonclustered); +insert into t (a, b, c) values +('测试1', 'asdfgh', 345346), +('你好2', 'qqwweerrrr', 987765), +('zxcvbnn',0xE38193E38293E381ABE381A1E381AF33, 1111111), +('asdfgh ', 0xEC9588EB8595ED9598EC84B8EC9A9434, 3333333333), +('Ciao5', ' asdfgh', 444400), +(' asdfgh ', 'Hola6', 6666), +('Bonjour ', '', 888888888), +('Olá8', ' ', 9999999), +('Привет9', ' ', 321321), +(' ', '12345', 35678); +set names utf8mb4; +explain select * from t use index (primary) where a like '测试%' and b like 'asd%' order by a,b; +select * from t use index (primary) where a like '测试%' and b like 'asd%' order by a,b; +explain select * from t use index (primary) where a like '测试1' and b like 'asdfgh %' order by a,b; +select * from t use index (primary) where a like '测试1' and b like 'asdfgh %' order by a,b; +set names latin1; +explain select * from t use index (primary) where b like 'こんにち_' and a like 'zxc%' order by a,b; +select * from t use index (primary) where b like 'こんにち_' and a like 'zxc%' order by a,b; +explain select * from t use index (primary) where b like '안녕하세요%' and a like 'asd%' order by a,b; +select * from t use index (primary) where b like '안녕하세요%' and a like 'asd%' order by a,b; +set names utf8mb4; +explain select * from t use index (primary) where a like 'Ciao%' and b like ' _%' order by a,b; +select * from t use index (primary) where a like 'Ciao%' and b like ' _%' order by a,b; +explain select * from t use index (primary) where b like 'HoLa%' and a like ' asdfgh' order by a,b; +select * from t use index (primary) where b like 'HoLa%' and a like ' asdfgh' order by a,b; +explain select * from t use index (primary) where a like 'bonjour _%' and b like '' order by a,b; +select * from t use index (primary) where a like 'bonjour _%' and b like '' order by a,b; +explain select * from t use index (primary) where a like 'OLá' and b like '_' order by a,b; +select * from t use index (primary) where a like 'OLá' and b like '_' order by a,b; +explain select * from t use index (primary) where a like 'Приве__' and b like ' %' order by a,b; +select * from t use index (primary) where a like 'Приве__' and b like ' %' order by a,b; +explain select * from t use index (primary) where a like ' %' and b like '123%' order by a,b; +select * from t use index (primary) where a like ' %' and b like '123%' order by a,b; +drop table t; +# Suite 5: utf8mb4_general_ci + prefix index +create table t(a varchar(20) collate utf8mb4_general_ci, b bigint, index ia(a(3),b)); +insert into t value +('测试',222), +('测试Abc',324), +('测试 ',543), +('你好',111), +('aABBccdd',890), +('A',456), +('Aa',456), +('aab',456), +('aabB',456), +('',234), +(' ',11111), +(' ',66666), +(' 语言',55555), +(' 语 言',3579), +('测测试 ',2468), +('测测试 ',99999), +(NULL,10); +explain select * from t use index (ia) where a > 'aabb' order by a,_tidb_rowid; +select * from t use index (ia) where a > 'aabb' order by a,_tidb_rowid; +explain select * from t use index (ia) where a > 'aab' order by a,_tidb_rowid; +select * from t use index (ia) where a > 'aab' order by a,_tidb_rowid; +explain select * from t use index (ia) where a > 'aa' order by a,_tidb_rowid; +select * from t use index (ia) where a > 'aa' order by a,_tidb_rowid; +explain select * from t use index (ia) where a < 'aabb' order by a,_tidb_rowid; +select * from t use index (ia) where a < 'aabb' order by a,_tidb_rowid; +explain select * from t use index (ia) where a < 'aab' order by a,_tidb_rowid; +select * from t use index (ia) where a < 'aab' order by a,_tidb_rowid; +explain select * from t use index (ia) where a < 'aa' order by a,_tidb_rowid; +select * from t use index (ia) where a < 'aa' order by a,_tidb_rowid; +explain select * from t use index (ia) where a != 'aa' order by a,_tidb_rowid; +select * from t use index (ia) where a != 'aa' order by a,_tidb_rowid; +explain select * from t use index (ia) where a != 'aaBbc' order by a,_tidb_rowid; +select * from t use index (ia) where a != 'aaBbc' order by a,_tidb_rowid; +explain select * from t use index (ia) where a like '测试abc' order by a,_tidb_rowid; +select * from t use index (ia) where a like '测试abc' order by a,_tidb_rowid; +explain select * from t use index (ia) where a = '测试abc' order by a,_tidb_rowid; +select * from t use index (ia) where a = '测试abc' order by a,_tidb_rowid; +explain select * from t use index (ia) where a like 'aa' order by a,_tidb_rowid; +select * from t use index (ia) where a like 'aa' order by a,_tidb_rowid; +explain select * from t use index (ia) where a = 'aa' order by a,_tidb_rowid; +select * from t use index (ia) where a = 'aa' order by a,_tidb_rowid; +explain select * from t use index (ia) where a like '测测试 ' order by a,_tidb_rowid; +select * from t use index (ia) where a like '测测试 ' order by a,_tidb_rowid; +explain select * from t use index (ia) where a = '测测试 ' order by a,_tidb_rowid; +select * from t use index (ia) where a = '测测试 ' order by a,_tidb_rowid; +explain select * from t use index (ia) where a like ' 语 言' order by a,_tidb_rowid; +select * from t use index (ia) where a like ' 语 言' order by a,_tidb_rowid; +explain select * from t use index (ia) where a = ' 语 言' order by a,_tidb_rowid; +select * from t use index (ia) where a = ' 语 言' order by a,_tidb_rowid; +explain select * from t use index (ia) where a like '测试%' order by a,_tidb_rowid; +select * from t use index (ia) where a like '测试%' order by a,_tidb_rowid; +explain select * from t use index (ia) where a like '测_' order by a,_tidb_rowid; +select * from t use index (ia) where a like '测_' order by a,_tidb_rowid; +explain select * from t use index (ia) where a like '测测试 %' order by a,_tidb_rowid; +select * from t use index (ia) where a like '测测试 %' order by a,_tidb_rowid; +explain select * from t use index (ia) where a like '测试a__' order by a,_tidb_rowid; +select * from t use index (ia) where a like '测试a__' order by a,_tidb_rowid; +explain select * from t use index (ia) where a like '测试 __' order by a,_tidb_rowid; +select * from t use index (ia) where a like '测试 __' order by a,_tidb_rowid; +explain select * from t use index (ia) where a like ' _' order by a,_tidb_rowid; +select * from t use index (ia) where a like ' _' order by a,_tidb_rowid; +explain select * from t use index (ia) where a like ' %' order by a,_tidb_rowid; +select * from t use index (ia) where a like ' %' order by a,_tidb_rowid; +explain select * from t use index (ia) where a like ' 语言%%' order by a,_tidb_rowid; +select * from t use index (ia) where a like ' 语言%%' order by a,_tidb_rowid; +explain select * from t use index (ia) where a not in ('aabc','dd') order by a,_tidb_rowid; +select * from t use index (ia) where a not in ('aabc','dd') order by a,_tidb_rowid; +explain select * from t where a >= 'aabb' and a <= 'aabd' and b = 456 order by a,_tidb_rowid; +select * from t where a >= 'aabb' and a <= 'aabd' and b = 456 order by a,_tidb_rowid; diff --git a/executor/point_get.go b/executor/point_get.go index 55062cda824e8..985b8501a3a68 100644 --- a/executor/point_get.go +++ b/executor/point_get.go @@ -540,11 +540,13 @@ func EncodeUniqueIndexValuesForKey(ctx sessionctx.Context, tblInfo *model.TableI colInfo := tblInfo.Columns[idxInfo.Columns[i].Offset] // table.CastValue will append 0x0 if the string value's length is smaller than the BINARY column's length. // So we don't use CastValue for string value for now. - // TODO: merge two if branch. + // TODO: The first if branch should have been removed, because the functionality of set the collation of the datum + // have been moved to util/ranger (normal path) and getNameValuePairs/getPointGetValue (fast path). But this change + // will be cherry-picked to a hotfix, so we choose to be a bit conservative and keep this for now. if colInfo.GetType() == mysql.TypeString || colInfo.GetType() == mysql.TypeVarString || colInfo.GetType() == mysql.TypeVarchar { var str string str, err = idxVals[i].ToString() - idxVals[i].SetString(str, colInfo.FieldType.GetCollate()) + idxVals[i].SetString(str, idxVals[i].Collation()) } else if colInfo.GetType() == mysql.TypeEnum && (idxVals[i].Kind() == types.KindString || idxVals[i].Kind() == types.KindBytes || idxVals[i].Kind() == types.KindBinaryLiteral) { var str string var e types.Enum diff --git a/planner/core/casetest/testdata/derive_topn_from_window_out.json b/planner/core/casetest/testdata/derive_topn_from_window_out.json index ff5da33b16016..b29d2f5d88596 100644 --- a/planner/core/casetest/testdata/derive_topn_from_window_out.json +++ b/planner/core/casetest/testdata/derive_topn_from_window_out.json @@ -440,7 +440,7 @@ " └─TableReader 1.11 root data:TopN", " └─TopN 1.11 cop[tikv] partition by test.customer.primary_key, test.customer.secondary_key order by test.customer.c_timestamp, offset:0, count:10", " └─Selection 1.11 cop[tikv] ge(test.customer.c_timestamp, 1661883508511000000)", - " └─TableRangeScan 33.33 cop[tikv] table:customer range:[0x0002 0x0001,0x0002 +inf], keep order:false, stats:pseudo" + " └─TableRangeScan 33.33 cop[tikv] table:customer range:[\"\\x00\\x02\" \"\\x00\\x01\",\"\\x00\\x02\" +inf], keep order:false, stats:pseudo" ], "Res": null }, diff --git a/planner/core/casetest/testdata/plan_suite_out.json b/planner/core/casetest/testdata/plan_suite_out.json index 8fda11bb0f3a6..24ccb637277ac 100644 --- a/planner/core/casetest/testdata/plan_suite_out.json +++ b/planner/core/casetest/testdata/plan_suite_out.json @@ -2787,11 +2787,11 @@ }, { "SQL": "select a from t where c_str like ''", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"\",\"\"]])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"\",\"\"]]->Sel([like(test.t.c_str, , 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc'", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abc\"]])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abc\"]]->Sel([like(test.t.c_str, abc, 92)]))->Projection" }, { "SQL": "select a from t where c_str not like 'abc'", @@ -2807,11 +2807,11 @@ }, { "SQL": "select a from t where c_str like 'abc%'", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc%, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc_'", - "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc_, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc%af'", @@ -2819,31 +2819,31 @@ }, { "SQL": "select a from t where c_str like 'abc\\_' escape ''", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc\\_'", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc\\\\_'", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc\\_%'", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\_%, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc=_%' escape '='", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc=_%, 61)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc\\__'", - "Best": "IndexReader(Index(t.c_d_e_str)[(\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\__, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 123", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"123\",\"123\"]])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"123\",\"123\"]]->Sel([like(test.t.c_str, 123, 92)]))->Projection" }, { "SQL": "select a from t where c = 1.9 and d > 3", @@ -8006,21 +8006,21 @@ " │ └─StreamAgg 249.75 root funcs:max(test.tc.id)->Column#14", " │ └─TopN 62.38 root test.tc.id:desc, offset:0, count:1", " │ └─IndexLookUp 62.38 root ", - " │ ├─Selection(Build) 62.44 cop[tikv] eq(test.ta.name, test.tc.name)", + " │ ├─Selection(Build) 62.38 cop[tikv] eq(test.ta.name, test.tc.name), like(test.tc.name, \"chad99%\", 92)", " │ │ └─IndexRangeScan 62437.50 cop[tikv] table:tc, index:idx_tc_name(name) range:[\"chad99\",\"chad9:\"), keep order:false, stats:pseudo", " │ └─TopN(Probe) 62.38 cop[tikv] test.tc.id:desc, offset:0, count:1", " │ └─Selection 62.38 cop[tikv] not(isnull(test.tc.id))", - " │ └─TableRowIDScan 62.44 cop[tikv] table:tc keep order:false, stats:pseudo", + " │ └─TableRowIDScan 62.38 cop[tikv] table:tc keep order:false, stats:pseudo", " └─Selection(Probe) 199.80 root gt(Column#19, 100)", " └─MaxOneRow 249.75 root ", " └─StreamAgg 249.75 root funcs:max(test.td.id)->Column#19", - " └─Limit 62.38 root offset:0, count:1", - " └─Projection 62.38 root test.td.id, test.td.name", - " └─IndexLookUp 62.38 root ", - " ├─Selection(Build) 2495.00 cop[tikv] eq(test.ta.id, test.td.id)", - " │ └─IndexFullScan 2495002.50 cop[tikv] table:td, index:idx_tc_id(id) keep order:true, desc, stats:pseudo", - " └─Selection(Probe) 62.38 cop[tikv] like(test.td.name, \"chad999%\", 92)", - " └─TableRowIDScan 2495.00 cop[tikv] table:td keep order:false, stats:pseudo" + " └─TopN 62.38 root test.td.id:desc, offset:0, count:1", + " └─IndexLookUp 62.38 root ", + " ├─Selection(Build) 1560.94 cop[tikv] like(test.td.name, \"chad999%\", 92)", + " │ └─IndexRangeScan 62437.50 cop[tikv] table:td, index:idx_tc_name(name) range:[\"chad999\",\"chad99:\"), keep order:false, stats:pseudo", + " └─TopN(Probe) 62.38 cop[tikv] test.td.id:desc, offset:0, count:1", + " └─Selection 62.38 cop[tikv] eq(test.ta.id, test.td.id), not(isnull(test.td.id))", + " └─TableRowIDScan 1560.94 cop[tikv] table:td keep order:false, stats:pseudo" ], "Result": null, "Warning": null @@ -8034,29 +8034,31 @@ " │ ├─Apply(Build) 10000.00 root CARTESIAN semi join", " │ │ ├─TableReader(Build) 10000.00 root data:TableFullScan", " │ │ │ └─TableFullScan 10000.00 cop[tikv] table:ta keep order:false, stats:pseudo", - " │ │ └─TableReader(Probe) 2500.00 root data:Selection", - " │ │ └─Selection 2500.00 cop[tikv] eq(test.ta.code, test.tb.code), like(test.tb.name, \"chad9%\", 92)", - " │ │ └─TableFullScan 100000000.00 cop[tikv] table:tb keep order:false, stats:pseudo", + " │ │ └─IndexLookUp(Probe) 2500.00 root ", + " │ │ ├─Selection(Build) 62500.00 cop[tikv] like(test.tb.name, \"chad9%\", 92)", + " │ │ │ └─IndexRangeScan 2500000.00 cop[tikv] table:tb, index:idx_tb_name(name) range:[\"chad9\",\"chad:\"), keep order:false, stats:pseudo", + " │ │ └─Selection(Probe) 2500.00 cop[tikv] eq(test.ta.code, test.tb.code)", + " │ │ └─TableRowIDScan 62500.00 cop[tikv] table:tb keep order:false, stats:pseudo", " │ └─Selection(Probe) 8000.00 root gt(Column#14, 100)", " │ └─MaxOneRow 10000.00 root ", " │ └─StreamAgg 10000.00 root funcs:max(test.tc.id)->Column#14", " │ └─TopN 2497.50 root test.tc.id:desc, offset:0, count:1", " │ └─IndexLookUp 2497.50 root ", - " │ ├─Selection(Build) 2500.00 cop[tikv] eq(test.ta.name, test.tc.name)", + " │ ├─Selection(Build) 2497.50 cop[tikv] eq(test.ta.name, test.tc.name), like(test.tc.name, \"chad99%\", 92)", " │ │ └─IndexRangeScan 2500000.00 cop[tikv] table:tc, index:idx_tc_name(name) range:[\"chad99\",\"chad9:\"), keep order:false, stats:pseudo", " │ └─TopN(Probe) 2497.50 cop[tikv] test.tc.id:desc, offset:0, count:1", " │ └─Selection 2497.50 cop[tikv] not(isnull(test.tc.id))", - " │ └─TableRowIDScan 2500.00 cop[tikv] table:tc keep order:false, stats:pseudo", + " │ └─TableRowIDScan 2497.50 cop[tikv] table:tc keep order:false, stats:pseudo", " └─Selection(Probe) 8000.00 root gt(Column#19, 100)", " └─MaxOneRow 10000.00 root ", " └─StreamAgg 10000.00 root funcs:max(test.td.id)->Column#19", - " └─Limit 2497.50 root offset:0, count:1", - " └─Projection 2497.50 root test.td.id, test.td.name", - " └─IndexLookUp 2497.50 root ", - " ├─Selection(Build) 99900.00 cop[tikv] eq(test.ta.id, test.td.id)", - " │ └─IndexFullScan 99900000.00 cop[tikv] table:td, index:idx_tc_id(id) keep order:true, desc, stats:pseudo", - " └─Selection(Probe) 2497.50 cop[tikv] like(test.td.name, \"chad999%\", 92)", - " └─TableRowIDScan 99900.00 cop[tikv] table:td keep order:false, stats:pseudo" + " └─TopN 2497.50 root test.td.id:desc, offset:0, count:1", + " └─IndexLookUp 2497.50 root ", + " ├─Selection(Build) 62500.00 cop[tikv] like(test.td.name, \"chad999%\", 92)", + " │ └─IndexRangeScan 2500000.00 cop[tikv] table:td, index:idx_tc_name(name) range:[\"chad999\",\"chad99:\"), keep order:false, stats:pseudo", + " └─TopN(Probe) 2497.50 cop[tikv] test.td.id:desc, offset:0, count:1", + " └─Selection 2497.50 cop[tikv] eq(test.ta.id, test.td.id), not(isnull(test.td.id))", + " └─TableRowIDScan 62500.00 cop[tikv] table:td keep order:false, stats:pseudo" ], "Result": null, "Warning": null diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 7bf64614c4a2b..44ab84812b616 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -3535,13 +3535,13 @@ func TestIssues29711(t *testing.T) { "`col_251` enum('Alice','Bob','Charlie','David') COLLATE utf8_unicode_ci NOT NULL DEFAULT 'Charlie'," + "PRIMARY KEY (`col_251`,`col_250`(1)) NONCLUSTERED);") tk.MustQuery("explain format=brief " + - "select col_250,col_251 from tbl_29711 where col_251 between 'Bob' and 'David' order by col_250,col_251 limit 6;"). + "select col_250,col_251 from tbl_29711 use index (primary) where col_251 between 'Bob' and 'David' order by col_250,col_251 limit 6;"). Check(testkit.Rows( "TopN 6.00 root test.tbl_29711.col_250, test.tbl_29711.col_251, offset:0, count:6", "└─IndexLookUp 6.00 root ", - " ├─IndexRangeScan(Build) 30.00 cop[tikv] table:tbl_29711, index:PRIMARY(col_251, col_250) range:[\"Bob\",\"Bob\"], [\"Charlie\",\"Charlie\"], [\"David\",\"David\"], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 250.00 cop[tikv] table:tbl_29711, index:PRIMARY(col_251, col_250) range:[\"Bob\",\"David\"], keep order:false, stats:pseudo", " └─TopN(Probe) 6.00 cop[tikv] test.tbl_29711.col_250, test.tbl_29711.col_251, offset:0, count:6", - " └─TableRowIDScan 30.00 cop[tikv] table:tbl_29711 keep order:false, stats:pseudo", + " └─TableRowIDScan 250.00 cop[tikv] table:tbl_29711 keep order:false, stats:pseudo", )) tk.MustExec("drop table if exists t29711") @@ -5015,14 +5015,14 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) { tk.MustExec("drop table if exists t1, t2") tk.MustExec("create table t1(a int, b varchar(10), c varchar(10), index idx_a_b(a, b))") tk.MustExec("create table t2(d int)") - tk.MustExec("set @@tidb_opt_range_max_size=1275") - // 1275 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc]. + tk.MustExec("set @@tidb_opt_range_max_size=1260") + // 1260 is enough for [? a,? a], [? b,? b], [? c,? c] but is not enough for [? aaaaaa,? aaaaaa], [? bbbbbb,? bbbbbb], [? cccccc,? cccccc]. rows := tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('a', 'b', 'c')").Rows() require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d) in(test.t1.b, a, b, c)]")) tk.MustQuery("show warnings").Check(testkit.Rows()) rows = tk.MustQuery("explain format='brief' select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in ('aaaaaa', 'bbbbbb', 'cccccc');").Rows() require.True(t, strings.Contains(rows[6][4].(string), "range: decided by [eq(test.t1.a, test.t2.d)]")) - tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen")) + tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen")) tk.MustExec("prepare stmt1 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?)'") tk.MustExec("set @a='a', @b='b', @c='c'") @@ -5043,7 +5043,7 @@ func TestPlanCacheForIndexJoinRangeFallback(t *testing.T) { tk.MustExec("prepare stmt2 from 'select /*+ inl_join(t1) */ * from t1 join t2 on t1.a = t2.d where t1.b in (?, ?, ?, ?, ?)'") tk.MustExec("set @a='a', @b='b', @c='c', @d='d', @e='e'") tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e") - tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1275 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen", + tk.MustQuery("show warnings").Sort().Check(testkit.Rows("Warning 1105 Memory capacity of 1260 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen", "Warning 1105 skip prepared plan-cache: in-list is too long")) tk.MustExec("execute stmt2 using @a, @b, @c, @d, @e") tk.MustQuery("select @@last_plan_from_cache").Check(testkit.Rows("0")) diff --git a/planner/core/plan_test.go b/planner/core/plan_test.go index 1d293d704aeb4..f0235d8043680 100644 --- a/planner/core/plan_test.go +++ b/planner/core/plan_test.go @@ -605,8 +605,9 @@ func TestIssue25729(t *testing.T) { for i := 0; i < 10; i++ { tk.MustQuery("explain format='brief' select * from t1 where concat(a, b) like \"aadwa\" and a = \"a\";").Check(testkit.Rows( "Projection 0.10 root test.t1.a, test.t1.b", - "└─IndexReader 0.10 root index:IndexRangeScan", - " └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:[\"a\" \"aadwa\",\"a\" \"aadwa\"], keep order:false, stats:pseudo")) + "└─IndexReader 0.10 root index:Selection", + " └─Selection 0.10 cop[tikv] like(concat(test.t1.a, test.t1.b), \"aadwa\", 92)", + " └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:[\"a\" \"aadwa\",\"a\" \"aadwa\"], keep order:false, stats:pseudo")) tk.MustQuery("explain format='brief' select b from t1 where concat(a, b) >= \"aa\" and a = \"b\";").Check(testkit.Rows( "Projection 33.33 root test.t1.b", diff --git a/planner/core/point_get_plan.go b/planner/core/point_get_plan.go index 5f670f224a5cd..c31c4ee9847a1 100644 --- a/planner/core/point_get_plan.go +++ b/planner/core/point_get_plan.go @@ -1436,7 +1436,15 @@ func getNameValuePairs(ctx sessionctx.Context, tbl *model.TableInfo, tblName mod col := model.FindColumnInfo(tbl.Cols(), colName.Name.Name.L) if col == nil { // Handling the case when the column is _tidb_rowid. return append(nvPairs, nameValuePair{colName: colName.Name.Name.L, colFieldType: types.NewFieldType(mysql.TypeLonglong), value: d, con: con}), false - } else if col.GetType() == mysql.TypeString && col.GetCollate() == charset.CollationBin { // This type we needn't to pad `\0` in here. + } + + // As in buildFromBinOp in util/ranger, when we build key from the expression to do range scan or point get on + // a string column, we should set the collation of the string datum to collation of the column. + if col.FieldType.EvalType() == types.ETString && (d.Kind() == types.KindString || d.Kind() == types.KindBinaryLiteral) { + d.SetString(d.GetString(), col.FieldType.GetCollate()) + } + + if col.GetType() == mysql.TypeString && col.GetCollate() == charset.CollationBin { // This type we needn't to pad `\0` in here. return append(nvPairs, nameValuePair{colName: colName.Name.Name.L, colFieldType: &col.FieldType, value: d, con: con}), false } if !checkCanConvertInPointGet(col, d) { @@ -1466,6 +1474,11 @@ func getPointGetValue(stmtCtx *stmtctx.StatementContext, col *model.ColumnInfo, if !checkCanConvertInPointGet(col, *d) { return nil } + // As in buildFromBinOp in util/ranger, when we build key from the expression to do range scan or point get on + // a string column, we should set the collation of the string datum to collation of the column. + if col.FieldType.EvalType() == types.ETString && (d.Kind() == types.KindString || d.Kind() == types.KindBinaryLiteral) { + d.SetString(d.GetString(), col.FieldType.GetCollate()) + } dVal, err := d.ConvertTo(stmtCtx, &col.FieldType) if err != nil { return nil diff --git a/planner/core/testdata/index_merge_suite_out.json b/planner/core/testdata/index_merge_suite_out.json index 2c66948aca057..b81d88dd08dc3 100644 --- a/planner/core/testdata/index_merge_suite_out.json +++ b/planner/core/testdata/index_merge_suite_out.json @@ -314,7 +314,7 @@ "Plan": [ "Selection 8.00 root json_memberof(cast(\"a\", json BINARY), json_extract(test.t.j0, \"$.path_string\"))", "└─IndexMerge 10.00 root type: union", - " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x61,0x61], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"a\",\"a\"], keep order:false, stats:pseudo", " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo" ] }, @@ -323,7 +323,7 @@ "Plan": [ "Selection 8.00 root json_memberof(cast(\"a\", json BINARY), json_extract(test.t.j0, \"$.path_string\"))", "└─IndexMerge 3.32 root type: union", - " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x61,0x61], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"a\",\"a\"], keep order:false, stats:pseudo", " └─Selection(Probe) 3.32 cop[tikv] lt(test.t.a, 10)", " └─TableRowIDScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo" ] @@ -332,9 +332,9 @@ "SQL": "select /*+ use_index_merge(t, j0_string) */ * from t where json_contains((j0->'$.path_string'), '[\"a\", \"b\", \"c\"]')", "Plan": [ "IndexMerge 10.00 root type: intersection", - "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x61,0x61], keep order:false, stats:pseudo", - "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x62,0x62], keep order:false, stats:pseudo", - "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x63,0x63], keep order:false, stats:pseudo", + "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"a\",\"a\"], keep order:false, stats:pseudo", + "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"b\",\"b\"], keep order:false, stats:pseudo", + "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"c\",\"c\"], keep order:false, stats:pseudo", "└─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo" ] }, @@ -342,9 +342,9 @@ "SQL": "select /*+ use_index_merge(t, j0_string) */ * from t where json_contains((j0->'$.path_string'), '[\"a\", \"b\", \"c\"]') and a<10", "Plan": [ "IndexMerge 3.32 root type: intersection", - "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x61,0x61], keep order:false, stats:pseudo", - "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x62,0x62], keep order:false, stats:pseudo", - "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x63,0x63], keep order:false, stats:pseudo", + "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"a\",\"a\"], keep order:false, stats:pseudo", + "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"b\",\"b\"], keep order:false, stats:pseudo", + "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"c\",\"c\"], keep order:false, stats:pseudo", "└─Selection(Probe) 3.32 cop[tikv] lt(test.t.a, 10)", " └─TableRowIDScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo" ] @@ -354,9 +354,9 @@ "Plan": [ "Selection 8.00 root json_overlaps(json_extract(test.t.j0, \"$.path_string\"), cast(\"[\"a\", \"b\", \"c\"]\", json BINARY))", "└─IndexMerge 10.00 root type: union", - " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x61,0x61], keep order:false, stats:pseudo", - " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x62,0x62], keep order:false, stats:pseudo", - " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x63,0x63], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"a\",\"a\"], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"b\",\"b\"], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"c\",\"c\"], keep order:false, stats:pseudo", " └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo" ] }, @@ -365,9 +365,9 @@ "Plan": [ "Selection 8.00 root json_overlaps(json_extract(test.t.j0, \"$.path_string\"), cast(\"[\"a\", \"b\", \"c\"]\", json BINARY))", "└─IndexMerge 3.32 root type: union", - " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x61,0x61], keep order:false, stats:pseudo", - " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x62,0x62], keep order:false, stats:pseudo", - " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[0x63,0x63], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"a\",\"a\"], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"b\",\"b\"], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_string(cast(json_extract(`j0`, _utf8mb4'$.path_string') as char(10) array)) range:[\"c\",\"c\"], keep order:false, stats:pseudo", " └─Selection(Probe) 3.32 cop[tikv] lt(test.t.a, 10)", " └─TableRowIDScan 10.00 cop[tikv] table:t keep order:false, stats:pseudo" ] @@ -479,7 +479,7 @@ "Plan": [ "Selection 0.00 root json_memberof(cast(\"3\", json BINARY), json_extract(test.t.j, \"$.str\"))", "└─IndexMerge 0.00 root type: union", - " ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(json_extract(`j`, _utf8mb4'$.str') as char(10) array), c) range:[1 2 0x33 4,1 2 0x33 4], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(json_extract(`j`, _utf8mb4'$.str') as char(10) array), c) range:[1 2 \"3\" 4,1 2 \"3\" 4], keep order:false, stats:pseudo", " └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo" ] }, @@ -488,7 +488,7 @@ "Plan": [ "Selection 0.08 root json_memberof(cast(\"3\", json BINARY), json_extract(test.t.j, \"$.str\"))", "└─IndexMerge 0.00 root type: union", - " ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(json_extract(`j`, _utf8mb4'$.str') as char(10) array), c) range:[1 2 0x33,1 2 0x33], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 0.00 cop[tikv] table:t, index:idx2(a, b, cast(json_extract(`j`, _utf8mb4'$.str') as char(10) array), c) range:[1 2 \"3\",1 2 \"3\"], keep order:false, stats:pseudo", " └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo" ] }, @@ -884,8 +884,8 @@ "IndexMerge 0.00 root type: intersection", "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is1(s1) range:[\"Abc\",\"Abc\"], keep order:false, stats:pseudo", "├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t5, index:is2(s2) range:(\"zzz\",+inf], keep order:false, stats:pseudo", - "├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"B啊a\"), keep order:false, stats:pseudo", - "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CcC\",\"CcC\"], keep order:false, stats:pseudo", + "├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t5, index:is3(s3) range:[-inf,\"\\x0eJ\\xfb@\\xd5J\\x0e3\"), keep order:false, stats:pseudo", + "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t5, index:is4(s4) range:[\"CCC\",\"CCC\"], keep order:false, stats:pseudo", "└─TableRowIDScan(Probe) 0.00 cop[tikv] table:t5 keep order:false, stats:pseudo" ], "Result": [ @@ -897,7 +897,7 @@ "Plan": [ "IndexMerge 0.03 root type: intersection", "├─IndexRangeScan(Build) 33.33 cop[tikv] table:t6, index:PRIMARY(s1, s2) range:(\"Abc\" \"zzz\",\"Abc\" +inf], keep order:false, stats:pseudo", - "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"A啊a\",\"A啊a\"], keep order:false, stats:pseudo", + "├─IndexRangeScan(Build) 10.00 cop[tikv] table:t6, index:is3(s3) range:[\"\\x0e3\\xfb@\\xd5J\\x0e3\",\"\\x0e3\\xfb@\\xd5J\\x0e3\"], keep order:false, stats:pseudo", "└─Selection(Probe) 0.03 cop[tikv] gt(test.t6.s2, \"zzz\"), not(like(test.t6.s4, \"Cd_\", 92))", " └─TableRowIDScan 0.03 cop[tikv] table:t6 keep order:false, stats:pseudo" ], @@ -925,13 +925,14 @@ { "SQL": "select /*+ use_index_merge(t8, primary,is2,is3,is4,is5) */ * from t8 where s1 like '啊A%' and s2 > 'abc' and s3 > 'cba' and s4 in ('aA', '??') and s5 = 'test,2'", "Plan": [ - "Selection 1.42 root eq(test.t8.s5, \"test,2\")", - "└─IndexMerge 0.59 root type: intersection", - " ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is2(s2) range:(0x616263,+inf], keep order:false, stats:pseudo", - " ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is3(s3) range:(0x636261,+inf], keep order:false, stats:pseudo", + "Selection 0.04 root eq(test.t8.s5, \"test,2\")", + "└─IndexMerge 0.06 root type: intersection", + " ├─IndexRangeScan(Build) 250.00 cop[tikv] table:t8, index:PRIMARY(s1) range:[\"UJ\\x00A\",\"UJ\\x00B\"), keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is2(s2) range:(\"abc\",+inf], keep order:false, stats:pseudo", + " ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t8, index:is3(s3) range:(\"cba\",+inf], keep order:false, stats:pseudo", " ├─IndexRangeScan(Build) 20.00 cop[tikv] table:t8, index:is4(s4) range:[\"aA\",\"aA\"], [\"??\",\"??\"], keep order:false, stats:pseudo", - " └─Selection(Probe) 0.59 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)", - " └─TableRowIDScan 2.22 cop[tikv] table:t8 keep order:false, stats:pseudo" + " └─Selection(Probe) 0.06 cop[tikv] gt(test.t8.s3, \"cba\"), like(test.t8.s1, \"啊A%\", 92)", + " └─TableRowIDScan 0.06 cop[tikv] table:t8 keep order:false, stats:pseudo" ], "Result": [ "啊aabbccdd abcc cccc aA tEsT,2" diff --git a/util/ranger/BUILD.bazel b/util/ranger/BUILD.bazel index be470b8c15d6c..48a8e6a5a5c23 100644 --- a/util/ranger/BUILD.bazel +++ b/util/ranger/BUILD.bazel @@ -30,6 +30,7 @@ go_library( "//util/codec", "//util/collate", "//util/dbterror", + "//util/hack", "//util/mathutil", "@com_github_pingcap_errors//:errors", "@org_golang_x_exp//slices", diff --git a/util/ranger/checker.go b/util/ranger/checker.go index 0468d3c92472e..96c10f36d25a9 100644 --- a/util/ranger/checker.go +++ b/util/ranger/checker.go @@ -18,12 +18,14 @@ import ( "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/parser/ast" "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/collate" ) // conditionChecker checks if this condition can be pushed to index planner. type conditionChecker struct { + ctx sessionctx.Context checkerCol *expression.Column length int optPrefixIndexSingleScan bool @@ -139,16 +141,6 @@ func (c *conditionChecker) checkScalarFunction(scalar *expression.ScalarFunction func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isAccessCond, shouldReserve bool) { _, collation := scalar.CharsetAndCollation() - if collate.NewCollationEnabled() && !collate.IsBinCollation(collation) { - // The algorithm constructs the range in byte-level: for example, ab% is mapped to [ab, ac] by adding 1 to the last byte. - // However, this is incorrect for non-binary collation strings because the sort key order is not the same as byte order. - // For example, "`%" is mapped to the range [`, a](where ` is 0x60 and a is 0x61). - // Because the collation utf8_general_ci is case-insensitive, a and A have the same sort key. - // Finally, the range comes to be [`, A], which is actually an empty range. - // See https://github.com/pingcap/tidb/issues/31174 for more details. - // In short, when the column type is non-binary collation string, we cannot use `like` expressions to generate the range. - return false, true - } if !collate.CompatibleCollate(scalar.GetArgs()[0].GetType().GetCollate(), collation) { return false, true } @@ -166,11 +158,20 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA if err != nil { return false, true } + likeFuncReserve := !c.isFullLengthColumn() + + // Different from `=`, trailing spaces are always significant, and can't be ignored in `like`. + // In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are + // unable to distinguish 'xxx' from 'xxx ' by a single index range scan, and we may read more data than needed by + // the `like` function. Therefore, a Selection is needed to filter the data. + if isPadSpaceCollation(collation) { + likeFuncReserve = true + } + if len(patternStr) == 0 { - return true, !c.isFullLengthColumn() + return true, likeFuncReserve } escape := byte(scalar.GetArgs()[2].(*expression.Constant).Value.GetInt64()) - likeFuncReserve := !c.isFullLengthColumn() for i := 0; i < len(patternStr); i++ { if patternStr[i] == escape { i++ diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 8ac79b9502ba8..1dff56d3c23a6 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -242,7 +242,7 @@ func compareCNFItemRangeResult(curResult, bestResult *cnfItemRangeResult) (curIs // e.g, for input CNF expressions ((a,b) in ((1,1),(2,2))) and a > 1 and ((a,b,c) in (1,1,1),(2,2,2)) // ((a,b,c) in (1,1,1),(2,2,2)) would be extracted. func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expression, cols []*expression.Column, - lengths []int, rangeMaxSize int64) (*cnfItemRangeResult, []*valueInfo, error) { + lengths []int, rangeMaxSize int64, convertToSortKey bool) (*cnfItemRangeResult, []*valueInfo, error) { if len(conds) < 2 { return nil, nil, nil } @@ -261,7 +261,7 @@ func extractBestCNFItemRanges(sctx sessionctx.Context, conds []expression.Expres // We build ranges for `(a,b) in ((1,1),(1,2))` and get `[1 1, 1 1] [1 2, 1 2]`, which are point ranges and we can // append `c = 1` to the point ranges. However, if we choose to merge consecutive ranges here, we get `[1 1, 1 2]`, // which are not point ranges, and we cannot append `c = 1` anymore. - res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize) + res, err := detachCondAndBuildRangeWithoutMerging(sctx, tmpConds, cols, lengths, rangeMaxSize, convertToSortKey) if err != nil { return nil, nil, err } @@ -304,7 +304,7 @@ func unionColumnValues(lhs, rhs []*valueInfo) []*valueInfo { // detachCNFCondAndBuildRangeForIndex will detach the index filters from table filters. These conditions are connected with `and` // It will first find the point query column and then extract the range query column. // considerDNF is true means it will try to extract access conditions from the DNF expressions. -func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expression.Expression, tpSlice []*types.FieldType, considerDNF bool) (*DetachRangeResult, error) { +func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expression.Expression, newTpSlice []*types.FieldType, considerDNF bool) (*DetachRangeResult, error) { var ( eqCount int ranges Ranges @@ -317,7 +317,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi return res, nil } var remainedConds []expression.Expression - ranges, accessConds, remainedConds, err = d.buildRangeOnColsByCNFCond(tpSlice, len(accessConds), accessConds) + ranges, accessConds, remainedConds, err = d.buildRangeOnColsByCNFCond(newTpSlice, len(accessConds), accessConds) if err != nil { return nil, err } @@ -337,7 +337,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi // Therefore, we need to calculate pointRanges separately so that it can be used to append tail ranges in considerDNF branch. // See https://github.com/pingcap/tidb/issues/26029 for details. var pointRanges Ranges - if hasPrefix(d.lengths) && fixPrefixColRange(ranges, d.lengths, tpSlice) { + if hasPrefix(d.lengths) { if d.mergeConsecutive { pointRanges = make(Ranges, 0, len(ranges)) for _, ran := range ranges { @@ -371,12 +371,13 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi return res, nil } checker := &conditionChecker{ + ctx: d.sctx, checkerCol: d.cols[eqOrInCount], length: d.lengths[eqOrInCount], optPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan, } if considerDNF { - bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize) + bestCNFItemRes, columnValues, err := extractBestCNFItemRanges(d.sctx, conditions, d.cols, d.lengths, d.rangeMaxSize, d.convertToSortKey) if err != nil { return nil, err } @@ -456,7 +457,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi } // `eqOrInCount` must be 0 when coming here. res.AccessConds, res.RemainedConds = detachColumnCNFConditions(d.sctx, newConditions, checker) - ranges, res.AccessConds, remainedConds, err = d.buildCNFIndexRange(tpSlice, 0, res.AccessConds) + ranges, res.AccessConds, remainedConds, err = d.buildCNFIndexRange(newTpSlice, 0, res.AccessConds) if err != nil { return nil, err } @@ -477,7 +478,7 @@ func (d *rangeDetacher) detachCNFCondAndBuildRangeForIndex(conditions []expressi accessConds = append(accessConds, cond) // TODO: if it's prefix column, we need to add cond to filterConds? } - ranges, accessConds, remainedConds, err = d.buildCNFIndexRange(tpSlice, eqOrInCount, accessConds) + ranges, accessConds, remainedConds, err = d.buildCNFIndexRange(newTpSlice, eqOrInCount, accessConds) if err != nil { return nil, err } @@ -612,7 +613,7 @@ func extractValueInfo(expr expression.Expression) *valueInfo { func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column, lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, []*valueInfo, bool) { var filters []expression.Expression - rb := builder{sc: sctx.GetSessionVars().StmtCtx} + rb := builder{sctx: sctx} accesses := make([]expression.Expression, len(cols)) points := make([][]*point, len(cols)) mergedAccesses := make([]expression.Expression, len(cols)) @@ -631,12 +632,16 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex } // Multiple Eq/In conditions for one column in CNF, apply intersection on them // Lazily compute the points for the previously visited Eq/In + newTp := newFieldType(cols[offset].GetType()) collator := collate.GetCollator(cols[offset].GetType().GetCollate()) if mergedAccesses[offset] == nil { mergedAccesses[offset] = accesses[offset] - points[offset] = rb.build(accesses[offset], collator) + // Note that this is a relatively special usage of build(). We will restore the points back to Expression for + // later use and may build the Expression to points again. + // We need to keep the original value here, which means we neither cut prefix nor convert to sort key. + points[offset] = rb.build(accesses[offset], newTp, types.UnspecifiedLength, false) } - points[offset] = rb.intersection(points[offset], rb.build(cond, collator), collator) + points[offset] = rb.intersection(points[offset], rb.build(cond, newTp, types.UnspecifiedLength, false), collator) if len(points[offset]) == 0 { // Early termination if false expression found if expression.MaybeOverOptimized4PlanCache(sctx, conditions) { // `a>@x and a<@y` --> `invalid-range if @x>=@y` @@ -714,11 +719,12 @@ func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex // We will detach the conditions of every DNF items, then compose them to a DNF. func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression.ScalarFunction, newTpSlice []*types.FieldType) (Ranges, []expression.Expression, []*valueInfo, bool, error) { firstColumnChecker := &conditionChecker{ + ctx: d.sctx, checkerCol: d.cols[0], length: d.lengths[0], optPrefixIndexSingleScan: d.sctx.GetSessionVars().OptPrefixIndexSingleScan, } - rb := builder{sc: d.sctx.GetSessionVars().StmtCtx} + rb := builder{sctx: d.sctx} dnfItems := expression.FlattenDNFConditions(condition) newAccessItems := make([]expression.Expression, 0, len(dnfItems)) var totalRanges Ranges @@ -776,9 +782,10 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression if shouldReserve { hasResidual = true } - points := rb.build(item, collate.GetCollator(newTpSlice[0].GetCollate())) + points := rb.build(item, newTpSlice[0], d.lengths[0], d.convertToSortKey) + tmpNewTp := convertStringFTToBinaryCollate(newTpSlice[0]) // TODO: restrict the mem usage of ranges - ranges, rangeFallback, err := points2Ranges(d.sctx, points, newTpSlice[0], d.rangeMaxSize) + ranges, rangeFallback, err := points2Ranges(d.sctx, points, tmpNewTp, d.rangeMaxSize) if err != nil { return nil, nil, nil, false, errors.Trace(err) } @@ -808,10 +815,6 @@ func (d *rangeDetacher) detachDNFCondAndBuildRangeForIndex(condition *expression } } - // Take prefix index into consideration. - if hasPrefix(d.lengths) { - fixPrefixColRange(totalRanges, d.lengths, newTpSlice) - } totalRanges, err := UnionRanges(d.sctx, totalRanges, d.mergeConsecutive) if err != nil { return nil, nil, nil, false, errors.Trace(err) @@ -874,6 +877,7 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre cols: cols, lengths: lengths, mergeConsecutive: true, + convertToSortKey: true, rangeMaxSize: rangeMaxSize, } return d.detachCondAndBuildRangeForCols() @@ -882,13 +886,14 @@ func DetachCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []expre // detachCondAndBuildRangeWithoutMerging detaches the index filters from table filters and uses them to build ranges. // When building ranges, it doesn't merge consecutive ranges. func detachCondAndBuildRangeWithoutMerging(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column, - lengths []int, rangeMaxSize int64) (*DetachRangeResult, error) { + lengths []int, rangeMaxSize int64, convertToSortKey bool) (*DetachRangeResult, error) { d := &rangeDetacher{ sctx: sctx, allConds: conditions, cols: cols, lengths: lengths, mergeConsecutive: false, + convertToSortKey: convertToSortKey, rangeMaxSize: rangeMaxSize, } return d.detachCondAndBuildRangeForCols() @@ -900,7 +905,7 @@ func detachCondAndBuildRangeWithoutMerging(sctx sessionctx.Context, conditions [ // The returned values are encapsulated into a struct DetachRangeResult, see its comments for explanation. func DetachCondAndBuildRangeForPartition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column, lengths []int, rangeMaxSize int64) (*DetachRangeResult, error) { - return detachCondAndBuildRangeWithoutMerging(sctx, conditions, cols, lengths, rangeMaxSize) + return detachCondAndBuildRangeWithoutMerging(sctx, conditions, cols, lengths, rangeMaxSize, false) } type rangeDetacher struct { @@ -909,6 +914,7 @@ type rangeDetacher struct { cols []*expression.Column lengths []int mergeConsecutive bool + convertToSortKey bool rangeMaxSize int64 } @@ -955,6 +961,7 @@ func DetachSimpleCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions [ cols: cols, lengths: lengths, mergeConsecutive: true, + convertToSortKey: true, rangeMaxSize: rangeMaxSize, } res, err := d.detachCNFCondAndBuildRangeForIndex(conditions, newTpSlice, false) @@ -986,6 +993,7 @@ func AppendConditionsIfNotExist(conditions, condsToAppend []expression.Expressio // we don't need to return the remained filter conditions, it is much simpler than DetachCondsForColumn. func ExtractAccessConditionsForColumn(ctx sessionctx.Context, conds []expression.Expression, col *expression.Column) []expression.Expression { checker := conditionChecker{ + ctx: ctx, checkerCol: col, length: types.UnspecifiedLength, optPrefixIndexSingleScan: ctx.GetSessionVars().OptPrefixIndexSingleScan, @@ -1001,6 +1009,7 @@ func ExtractAccessConditionsForColumn(ctx sessionctx.Context, conds []expression // DetachCondsForColumn detaches access conditions for specified column from other filter conditions. func DetachCondsForColumn(sctx sessionctx.Context, conds []expression.Expression, col *expression.Column) (accessConditions, otherConditions []expression.Expression) { checker := &conditionChecker{ + ctx: sctx, checkerCol: col, length: types.UnspecifiedLength, optPrefixIndexSingleScan: sctx.GetSessionVars().OptPrefixIndexSingleScan, @@ -1024,6 +1033,7 @@ func MergeDNFItems4Col(ctx sessionctx.Context, dnfItems []expression.Expression) uniqueID := cols[0].UniqueID checker := &conditionChecker{ + ctx: ctx, checkerCol: cols[0], length: types.UnspecifiedLength, optPrefixIndexSingleScan: ctx.GetSessionVars().OptPrefixIndexSingleScan, diff --git a/util/ranger/points.go b/util/ranger/points.go index e5061caa43446..27a8b85317222 100644 --- a/util/ranger/points.go +++ b/util/ranger/points.go @@ -23,12 +23,15 @@ import ( "github.com/pingcap/tidb/errno" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/parser/ast" + "github.com/pingcap/tidb/parser/charset" "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/collate" "github.com/pingcap/tidb/util/dbterror" + "github.com/pingcap/tidb/util/hack" ) // Error instances. @@ -133,6 +136,50 @@ func rangePointEqualValueLess(a, b *point) bool { return a.excl && !b.excl } +func pointsConvertToSortKey(sctx sessionctx.Context, inputPs []*point, newTp *types.FieldType) ([]*point, error) { + // Only handle normal string type here. + // Currently, set won't be pushed down and it shouldn't reach here in theory. + // For enum, we have separate logic for it, like handleEnumFromBinOp(). For now, it only supports point range, + // intervals are not supported. So we also don't need to handle enum here. + if newTp.EvalType() != types.ETString || + newTp.GetType() == mysql.TypeEnum || + newTp.GetType() == mysql.TypeSet { + return inputPs, nil + } + ps := make([]*point, 0, len(inputPs)) + for _, p := range inputPs { + np, err := pointConvertToSortKey(sctx, p, newTp, true) + if err != nil { + return nil, err + } + ps = append(ps, np) + } + return ps, nil +} + +func pointConvertToSortKey( + sctx sessionctx.Context, + inputP *point, + newTp *types.FieldType, + trimTrailingSpace bool, +) (*point, error) { + p, err := convertPoint(sctx, inputP, newTp) + if err != nil { + return nil, err + } + if p.value.Kind() != types.KindString || newTp.GetCollate() == charset.CollationBin || !collate.NewCollationEnabled() { + return p, nil + } + sortKey := p.value.GetBytes() + if !trimTrailingSpace { + sortKey = collate.GetCollator(newTp.GetCollate()).KeyWithoutTrimRightSpace(string(hack.String(sortKey))) + } else { + sortKey = collate.GetCollator(newTp.GetCollate()).Key(string(hack.String(sortKey))) + } + + return &point{value: types.NewBytesDatum(sortKey), excl: p.excl, start: p.start}, nil +} + func (r *pointSorter) Swap(i, j int) { r.points[i], r.points[j] = r.points[j], r.points[i] } @@ -181,16 +228,28 @@ func NullRange() Ranges { // builder is the range builder struct. type builder struct { - err error - sc *stmtctx.StatementContext + err error + sctx sessionctx.Context } -func (r *builder) build(expr expression.Expression, collator collate.Collator) []*point { +// build converts Expression on one column into point, which can be further built into Range. +// If the input prefixLen is not types.UnspecifiedLength, it means it's for a prefix column in a prefix index. In such +// cases, we should cut the prefix and adjust the exclusiveness. Ref: cutPrefixForPoints(). +// convertToSortKey indicates whether the string values should be converted to sort key. +// Converting to sort key can make `like` function be built into Range for new collation column. But we can't restore +// the original value from the sort key, so the usage of the result may be limited, like when you need to restore the +// result points back to Expression. +func (r *builder) build( + expr expression.Expression, + newTp *types.FieldType, + prefixLen int, + convertToSortKey bool, +) []*point { switch x := expr.(type) { case *expression.Column: return r.buildFromColumn() case *expression.ScalarFunction: - return r.buildFromScalarFunc(x, collator) + return r.buildFromScalarFunc(x, newTp, prefixLen, convertToSortKey) case *expression.Constant: return r.buildFromConstant(x) } @@ -208,7 +267,7 @@ func (r *builder) buildFromConstant(expr *expression.Constant) []*point { return nil } - val, err := dt.ToBool(r.sc) + val, err := dt.ToBool(r.sctx.GetSessionVars().StmtCtx) if err != nil { r.err = err return nil @@ -231,7 +290,12 @@ func (*builder) buildFromColumn() []*point { return []*point{startPoint1, endPoint1, startPoint2, endPoint2} } -func (r *builder) buildFromBinOp(expr *expression.ScalarFunction) []*point { +func (r *builder) buildFromBinOp( + expr *expression.ScalarFunction, + newTp *types.FieldType, + prefixLen int, + convertToSortKey bool, +) []*point { // This has been checked that the binary operation is comparison operation, and one of // the operand is column name expression. var ( @@ -253,11 +317,11 @@ func (r *builder) buildFromBinOp(expr *expression.ScalarFunction) []*point { // If the original value is adjusted, we need to change the condition. // For example, col < 2156. Since the max year is 2155, 2156 is changed to 2155. // col < 2155 is wrong. It should be col <= 2155. - preValue, err1 := value.ToInt64(r.sc) + preValue, err1 := value.ToInt64(r.sctx.GetSessionVars().StmtCtx) if err1 != nil { return err1 } - *value, err = value.ConvertToMysqlYear(r.sc, col.RetType) + *value, err = value.ConvertToMysqlYear(r.sctx.GetSessionVars().StmtCtx, col.RetType) if errors.ErrorEqual(err, types.ErrWarnDataOutOfRange) { // Keep err for EQ and NE. switch *op { @@ -334,43 +398,53 @@ func (r *builder) buildFromBinOp(expr *expression.ScalarFunction) []*point { } if ft.GetType() == mysql.TypeEnum && ft.EvalType() == types.ETString { - return handleEnumFromBinOp(r.sc, ft, value, op) + return handleEnumFromBinOp(r.sctx.GetSessionVars().StmtCtx, ft, value, op) } + var res []*point switch op { case ast.NullEQ: if value.IsNull() { - return []*point{{start: true}, {}} // [null, null] + res = []*point{{start: true}, {}} // [null, null] + break } fallthrough case ast.EQ: startPoint := &point{value: value, start: true} endPoint := &point{value: value} - return []*point{startPoint, endPoint} + res = []*point{startPoint, endPoint} case ast.NE: startPoint1 := &point{value: types.MinNotNullDatum(), start: true} endPoint1 := &point{value: value, excl: true} startPoint2 := &point{value: value, start: true, excl: true} endPoint2 := &point{value: types.MaxValueDatum()} - return []*point{startPoint1, endPoint1, startPoint2, endPoint2} + res = []*point{startPoint1, endPoint1, startPoint2, endPoint2} case ast.LT: startPoint := &point{value: types.MinNotNullDatum(), start: true} endPoint := &point{value: value, excl: true} - return []*point{startPoint, endPoint} + res = []*point{startPoint, endPoint} case ast.LE: startPoint := &point{value: types.MinNotNullDatum(), start: true} endPoint := &point{value: value} - return []*point{startPoint, endPoint} + res = []*point{startPoint, endPoint} case ast.GT: startPoint := &point{value: value, start: true, excl: true} endPoint := &point{value: types.MaxValueDatum()} - return []*point{startPoint, endPoint} + res = []*point{startPoint, endPoint} case ast.GE: startPoint := &point{value: value, start: true} endPoint := &point{value: types.MaxValueDatum()} - return []*point{startPoint, endPoint} + res = []*point{startPoint, endPoint} } - return nil + cutPrefixForPoints(res, prefixLen, ft) + if convertToSortKey { + res, err = pointsConvertToSortKey(r.sctx, res, newTp) + if err != nil { + r.err = err + return getFullRange() + } + } + return res } // handleUnsignedCol handles the case when unsigned column meets negative value. @@ -552,11 +626,17 @@ func (*builder) buildFromIsFalse(_ *expression.ScalarFunction, isNot int) []*poi return []*point{startPoint, endPoint} } -func (r *builder) buildFromIn(expr *expression.ScalarFunction) ([]*point, bool) { +func (r *builder) buildFromIn( + expr *expression.ScalarFunction, + newTp *types.FieldType, + prefixLen int, + convertToSortKey bool, +) ([]*point, bool) { list := expr.GetArgs()[1:] rangePoints := make([]*point, 0, len(list)*2) hasNull := false - colCollate := expr.GetArgs()[0].GetType().GetCollate() + ft := expr.GetArgs()[0].GetType() + colCollate := ft.GetCollate() for _, e := range list { v, ok := e.(*expression.Constant) if !ok { @@ -584,7 +664,7 @@ func (r *builder) buildFromIn(expr *expression.ScalarFunction) ([]*point, bool) err = parseErr } default: - dt, err = dt.ConvertTo(r.sc, expr.GetArgs()[0].GetType()) + dt, err = dt.ConvertTo(r.sctx.GetSessionVars().StmtCtx, expr.GetArgs()[0].GetType()) } if err != nil { @@ -593,7 +673,7 @@ func (r *builder) buildFromIn(expr *expression.ScalarFunction) ([]*point, bool) } } if expr.GetArgs()[0].GetType().GetType() == mysql.TypeYear { - dt, err = dt.ConvertToMysqlYear(r.sc, expr.GetArgs()[0].GetType()) + dt, err = dt.ConvertToMysqlYear(r.sctx.GetSessionVars().StmtCtx, expr.GetArgs()[0].GetType()) if err != nil { // in (..., an impossible value (not valid year), ...), the range is empty, so skip it. continue @@ -609,7 +689,7 @@ func (r *builder) buildFromIn(expr *expression.ScalarFunction) ([]*point, bool) endPoint := &point{value: endValue} rangePoints = append(rangePoints, startPoint, endPoint) } - sorter := pointSorter{points: rangePoints, sc: r.sc, collator: collate.GetCollator(colCollate)} + sorter := pointSorter{points: rangePoints, sc: r.sctx.GetSessionVars().StmtCtx, collator: collate.GetCollator(colCollate)} sort.Sort(&sorter) if sorter.err != nil { r.err = sorter.err @@ -628,10 +708,25 @@ func (r *builder) buildFromIn(expr *expression.ScalarFunction) ([]*point, bool) if curPos > 0 { curPos++ } - return rangePoints[:curPos], hasNull + rangePoints = rangePoints[:curPos] + cutPrefixForPoints(rangePoints, prefixLen, ft) + var err error + if convertToSortKey { + rangePoints, err = pointsConvertToSortKey(r.sctx, rangePoints, newTp) + if err != nil { + r.err = err + return getFullRange(), false + } + } + return rangePoints, hasNull } -func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*point { +func (r *builder) newBuildFromPatternLike( + expr *expression.ScalarFunction, + newTp *types.FieldType, + prefixLen int, + convertToSortKey bool, +) []*point { _, collation := expr.CharsetAndCollation() if !collate.CompatibleCollate(expr.GetArgs()[0].GetType().GetCollate(), collation) { return getFullRange() @@ -647,10 +742,19 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po r.err = errors.Trace(err) return getFullRange() } + // non-exceptional return case 1: empty pattern if pattern == "" { startPoint := &point{value: types.NewStringDatum(""), start: true} endPoint := &point{value: types.NewStringDatum("")} - return []*point{startPoint, endPoint} + res := []*point{startPoint, endPoint} + if convertToSortKey { + res, err = pointsConvertToSortKey(r.sctx, res, newTp) + if err != nil { + r.err = err + return getFullRange() + } + } + return res } lowValue := make([]byte, 0, len(pattern)) edt, err := expr.GetArgs()[2].(*expression.Constant).Eval(chunk.Row{}) @@ -677,36 +781,86 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po break } else if pattern[i] == '_' { // Get the prefix, but exclude the prefix. - // e.g., "abc_x", the start point exclude "abc", - // because the string length is more than 3. - exclude = true + // e.g., "abc_x", the start point excludes "abc" because the string length is more than 3. + // + // However, like the similar check in (*conditionChecker).checkLikeFunc(), in tidb's implementation, for + // PAD SPACE collations, the trailing spaces are removed in the index key. So we are unable to distinguish + // 'xxx' from 'xxx ' by a single index range scan. If we exclude the start point for PAD SPACE collation, + // we will actually miss 'xxx ', which will cause wrong results. + if !isPadSpaceCollation(collation) { + exclude = true + } isExactMatch = false break } lowValue = append(lowValue, pattern[i]) } + // non-exceptional return case 2: no characters before the wildcard if len(lowValue) == 0 { return []*point{{value: types.MinNotNullDatum(), start: true}, {value: types.MaxValueDatum()}} } + // non-exceptional return case 3: pattern contains valid characters and doesn't contain the wildcard if isExactMatch { val := types.NewCollationStringDatum(string(lowValue), tpOfPattern.GetCollate()) - return []*point{{value: val, start: true}, {value: val}} - } - startPoint := &point{start: true, excl: exclude} - startPoint.value.SetBytesAsString(lowValue, tpOfPattern.GetCollate(), uint32(tpOfPattern.GetFlen())) - highValue := make([]byte, len(lowValue)) - copy(highValue, lowValue) - endPoint := &point{excl: true} - for i := len(highValue) - 1; i >= 0; i-- { + startPoint := &point{value: val, start: true} + endPoint := &point{value: val} + res := []*point{startPoint, endPoint} + cutPrefixForPoints(res, prefixLen, tpOfPattern) + if convertToSortKey { + res, err = pointsConvertToSortKey(r.sctx, res, newTp) + if err != nil { + r.err = err + return getFullRange() + } + } + return res + } + + // non-exceptional return case 4: pattern contains valid characters and contains the wildcard + + // non-exceptional return case 4-1 + // If it's not a _bin or binary collation, and we don't convert the value to the sort key, we can't build + // a range for the wildcard. + if !convertToSortKey && + !collate.IsBinCollation(tpOfPattern.GetCollate()) { + return []*point{{value: types.MinNotNullDatum(), start: true}, {value: types.MaxValueDatum()}} + } + + // non-exceptional return case 4-2: build a range for the wildcard + // the end_key is sortKey(start_value) + 1 + originalStartPoint := &point{start: true, excl: exclude} + originalStartPoint.value.SetBytesAsString(lowValue, tpOfPattern.GetCollate(), uint32(tpOfPattern.GetFlen())) + cutPrefixForPoints([]*point{originalStartPoint}, prefixLen, tpOfPattern) + + // If we don't trim the trailing spaces, which means using KeyWithoutTrimRightSpace() instead of Key(), we can build + // a smaller range for better performance, e.g., LIKE ' %'. + // However, if it's a PAD SPACE collation, we must trim the trailing spaces for the start point to ensure the correctness. + // Because the trailing spaces are trimmed in the stored index key. For example, for LIKE 'abc %' on utf8mb4_bin + // column, the start key should be 'abd' instead of 'abc ', but the end key can be 'abc!'. ( ' ' is 32 and '!' is 33 + // in ASCII) + shouldTrimTrailingSpace := isPadSpaceCollation(collation) + startPoint, err := pointConvertToSortKey(r.sctx, originalStartPoint, newTp, shouldTrimTrailingSpace) + if err != nil { + r.err = errors.Trace(err) + return getFullRange() + } + sortKeyPointWithoutTrim, err := pointConvertToSortKey(r.sctx, originalStartPoint, newTp, false) + if err != nil { + r.err = errors.Trace(err) + return getFullRange() + } + sortKeyWithoutTrim := append([]byte{}, sortKeyPointWithoutTrim.value.GetBytes()...) + endPoint := &point{value: types.MaxValueDatum(), excl: true} + for i := len(sortKeyWithoutTrim) - 1; i >= 0; i-- { // Make the end point value more than the start point value, // and the length of the end point value is the same as the length of the start point value. // e.g., the start point value is "abc", so the end point value is "abd". - highValue[i]++ - if highValue[i] != 0 { - endPoint.value.SetBytesAsString(highValue, tpOfPattern.GetCollate(), uint32(tpOfPattern.GetFlen())) + sortKeyWithoutTrim[i]++ + if sortKeyWithoutTrim[i] != 0 { + endPoint.value.SetBytes(sortKeyWithoutTrim) break } - // If highValue[i] is 255 and highValue[i]++ is 0, then the end point value is max value. + // If sortKeyWithoutTrim[i] is 255 and sortKeyWithoutTrim[i]++ is 0, then the end point value is max value. if i == 0 { endPoint.value = types.MaxValueDatum() } @@ -714,7 +868,20 @@ func (r *builder) newBuildFromPatternLike(expr *expression.ScalarFunction) []*po return []*point{startPoint, endPoint} } -func (r *builder) buildFromNot(expr *expression.ScalarFunction) []*point { +// isPadSpaceCollation returns whether the collation is a PAD SPACE collation. +// Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple +// collation != binary check here. We may also move it to collation related packages when NO PAD collations are +// implemented in the future. +func isPadSpaceCollation(collation string) bool { + return collation != charset.CollationBin +} + +func (r *builder) buildFromNot( + expr *expression.ScalarFunction, + newTp *types.FieldType, + prefixLen int, + convertToSortKey bool, +) []*point { switch n := expr.FuncName.L; n { case ast.IsTruthWithoutNull: return r.buildFromIsTrue(expr, 1, false) @@ -727,7 +894,14 @@ func (r *builder) buildFromNot(expr *expression.ScalarFunction) []*point { isUnsignedIntCol bool nonNegativePos int ) - rangePoints, hasNull := r.buildFromIn(expr) + // Note that we must handle the cutting prefix and converting to sort key in buildFromNot, because if we cut the + // prefix inside buildFromIn(), the inversion logic here would make an incomplete and wrong range. + // For example, for index col(1), col NOT IN ('aaa', 'bbb'), if we cut the prefix in buildFromIn(), we would get + // ['a', 'a'], ['b', 'b'] from there. Then after in this function we would get ['' 'a'), ('a', 'b'), ('b', +inf] + // as the result. This is wrong because data like 'ab' would be missed. Actually we are unable to build a range + // for this case. + // So we must cut the prefix in this function, therefore converting to sort key must also be done here. + rangePoints, hasNull := r.buildFromIn(expr, newTp, types.UnspecifiedLength, false) if hasNull { return nil } @@ -753,6 +927,15 @@ func (r *builder) buildFromNot(expr *expression.ScalarFunction) []*point { // Append the interval (last element, max value]. retRangePoints = append(retRangePoints, &point{value: previousValue, start: true, excl: true}) retRangePoints = append(retRangePoints, &point{value: types.MaxValueDatum()}) + cutPrefixForPoints(retRangePoints, prefixLen, expr.GetArgs()[0].GetType()) + if convertToSortKey { + var err error + retRangePoints, err = pointsConvertToSortKey(r.sctx, retRangePoints, newTp) + if err != nil { + r.err = err + return getFullRange() + } + } return retRangePoints case ast.Like: // Pattern not like is not supported. @@ -769,14 +952,27 @@ func (r *builder) buildFromNot(expr *expression.ScalarFunction) []*point { return getFullRange() } -func (r *builder) buildFromScalarFunc(expr *expression.ScalarFunction, collator collate.Collator) []*point { +func (r *builder) buildFromScalarFunc( + expr *expression.ScalarFunction, + newTp *types.FieldType, + prefixLen int, + convertToSortKey bool, +) []*point { switch op := expr.FuncName.L; op { case ast.GE, ast.GT, ast.LT, ast.LE, ast.EQ, ast.NE, ast.NullEQ: - return r.buildFromBinOp(expr) + return r.buildFromBinOp(expr, newTp, prefixLen, convertToSortKey) case ast.LogicAnd: - return r.intersection(r.build(expr.GetArgs()[0], collator), r.build(expr.GetArgs()[1], collator), collator) + collator := collate.GetCollator(newTp.GetCollate()) + if convertToSortKey { + collator = collate.GetCollator(charset.CollationBin) + } + return r.intersection(r.build(expr.GetArgs()[0], newTp, prefixLen, convertToSortKey), r.build(expr.GetArgs()[1], newTp, prefixLen, convertToSortKey), collator) case ast.LogicOr: - return r.union(r.build(expr.GetArgs()[0], collator), r.build(expr.GetArgs()[1], collator), collator) + collator := collate.GetCollator(newTp.GetCollate()) + if convertToSortKey { + collator = collate.GetCollator(charset.CollationBin) + } + return r.union(r.build(expr.GetArgs()[0], newTp, prefixLen, convertToSortKey), r.build(expr.GetArgs()[1], newTp, prefixLen, convertToSortKey), collator) case ast.IsTruthWithoutNull: return r.buildFromIsTrue(expr, 0, false) case ast.IsTruthWithNull: @@ -784,25 +980,31 @@ func (r *builder) buildFromScalarFunc(expr *expression.ScalarFunction, collator case ast.IsFalsity: return r.buildFromIsFalse(expr, 0) case ast.In: - retPoints, _ := r.buildFromIn(expr) + retPoints, _ := r.buildFromIn(expr, newTp, prefixLen, convertToSortKey) return retPoints case ast.Like: - return r.newBuildFromPatternLike(expr) + return r.newBuildFromPatternLike(expr, newTp, prefixLen, convertToSortKey) case ast.IsNull: startPoint := &point{start: true} endPoint := &point{} return []*point{startPoint, endPoint} case ast.UnaryNot: - return r.buildFromNot(expr.GetArgs()[0].(*expression.ScalarFunction)) + return r.buildFromNot(expr.GetArgs()[0].(*expression.ScalarFunction), newTp, prefixLen, convertToSortKey) } return nil } +// We need an input collator because our (*Datum).Compare(), which is used in this method, needs an explicit collator +// input to handle comparison for string and bytes. +// Note that if the points are converted to sort key, the collator should be set to charset.CollationBin. func (r *builder) intersection(a, b []*point, collator collate.Collator) []*point { return r.merge(a, b, false, collator) } +// We need an input collator because our (*Datum).Compare(), which is used in this method, needs an explicit collator +// input to handle comparison for string and bytes. +// Note that if the points are converted to sort key, the collator should be set to charset.CollationBin. func (r *builder) union(a, b []*point, collator collate.Collator) []*point { return r.merge(a, b, true, collator) } @@ -811,7 +1013,7 @@ func (r *builder) mergeSorted(a, b []*point, collator collate.Collator) []*point ret := make([]*point, 0, len(a)+len(b)) i, j := 0, 0 for i < len(a) && j < len(b) { - less, err := rangePointLess(r.sc, a[i], b[j], collator) + less, err := rangePointLess(r.sctx.GetSessionVars().StmtCtx, a[i], b[j], collator) if err != nil { r.err = err return nil diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index ad83f68ebd258..4d402ae66d6f0 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -58,13 +58,13 @@ func validInterval(sctx sessionctx.Context, low, high *point) (bool, error) { // convertPoints does some preprocessing on rangePoints to make them ready to build ranges. Preprocessing includes converting // points to the specified type, validating intervals and skipping impossible intervals. -func convertPoints(sctx sessionctx.Context, rangePoints []*point, tp *types.FieldType, skipNull bool, tableRange bool) ([]*point, error) { +func convertPoints(sctx sessionctx.Context, rangePoints []*point, newTp *types.FieldType, skipNull bool, tableRange bool) ([]*point, error) { i := 0 numPoints := len(rangePoints) var minValueDatum, maxValueDatum types.Datum if tableRange { // Currently, table's kv range cannot accept encoded value of MaxValueDatum. we need to convert it. - isUnsigned := mysql.HasUnsignedFlag(tp.GetFlag()) + isUnsigned := mysql.HasUnsignedFlag(newTp.GetFlag()) if isUnsigned { minValueDatum.SetUint64(0) maxValueDatum.SetUint64(math.MaxUint64) @@ -74,7 +74,7 @@ func convertPoints(sctx sessionctx.Context, rangePoints []*point, tp *types.Fiel } } for j := 0; j < numPoints; j += 2 { - startPoint, err := convertPoint(sctx, rangePoints[j], tp) + startPoint, err := convertPoint(sctx, rangePoints[j], newTp) if err != nil { return nil, errors.Trace(err) } @@ -86,7 +86,7 @@ func convertPoints(sctx sessionctx.Context, rangePoints []*point, tp *types.Fiel startPoint.value = minValueDatum } } - endPoint, err := convertPoint(sctx, rangePoints[j+1], tp) + endPoint, err := convertPoint(sctx, rangePoints[j+1], newTp) if err != nil { return nil, errors.Trace(err) } @@ -122,15 +122,15 @@ func estimateMemUsageForPoints2Ranges(rangePoints []*point) int64 { // Only one column is built there. If there're multiple columns, use appendPoints2Ranges. // rangeMaxSize is the max memory limit for ranges. O indicates no memory limit. // If the second return value is true, it means that the estimated memory usage of ranges exceeds rangeMaxSize and it falls back to full range. -func points2Ranges(sctx sessionctx.Context, rangePoints []*point, tp *types.FieldType, rangeMaxSize int64) (Ranges, bool, error) { - convertedPoints, err := convertPoints(sctx, rangePoints, tp, mysql.HasNotNullFlag(tp.GetFlag()), false) +func points2Ranges(sctx sessionctx.Context, rangePoints []*point, newTp *types.FieldType, rangeMaxSize int64) (Ranges, bool, error) { + convertedPoints, err := convertPoints(sctx, rangePoints, newTp, mysql.HasNotNullFlag(newTp.GetFlag()), false) if err != nil { return nil, false, errors.Trace(err) } // Estimate whether rangeMaxSize will be exceeded first before converting points to ranges. if rangeMaxSize > 0 && estimateMemUsageForPoints2Ranges(convertedPoints) > rangeMaxSize { var fullRange Ranges - if mysql.HasNotNullFlag(tp.GetFlag()) { + if mysql.HasNotNullFlag(newTp.GetFlag()) { fullRange = FullNotNullRange() } else { fullRange = FullRange() @@ -145,46 +145,46 @@ func points2Ranges(sctx sessionctx.Context, rangePoints []*point, tp *types.Fiel LowExclude: startPoint.excl, HighVal: []types.Datum{endPoint.value}, HighExclude: endPoint.excl, - Collators: []collate.Collator{collate.GetCollator(tp.GetCollate())}, + Collators: []collate.Collator{collate.GetCollator(newTp.GetCollate())}, } ranges = append(ranges, ran) } return ranges, false, nil } -func convertPoint(sctx sessionctx.Context, point *point, tp *types.FieldType) (*point, error) { +func convertPoint(sctx sessionctx.Context, point *point, newTp *types.FieldType) (*point, error) { sc := sctx.GetSessionVars().StmtCtx switch point.value.Kind() { case types.KindMaxValue, types.KindMinNotNull: return point, nil } - casted, err := point.value.ConvertTo(sc, tp) + casted, err := point.value.ConvertTo(sc, newTp) if err != nil { if sctx.GetSessionVars().StmtCtx.InPreparedPlanBuilding { // skip plan cache in this case for safety. sctx.GetSessionVars().StmtCtx.SetSkipPlanCache(errors.Errorf("%s when converting %v", err.Error(), point.value)) } //revive:disable:empty-block - if tp.GetType() == mysql.TypeYear && terror.ErrorEqual(err, types.ErrWarnDataOutOfRange) { + if newTp.GetType() == mysql.TypeYear && terror.ErrorEqual(err, types.ErrWarnDataOutOfRange) { // see issue #20101: overflow when converting integer to year - } else if tp.GetType() == mysql.TypeBit && terror.ErrorEqual(err, types.ErrDataTooLong) { + } else if newTp.GetType() == mysql.TypeBit && terror.ErrorEqual(err, types.ErrDataTooLong) { // see issue #19067: we should ignore the types.ErrDataTooLong when we convert value to TypeBit value - } else if tp.GetType() == mysql.TypeNewDecimal && terror.ErrorEqual(err, types.ErrOverflow) { + } else if newTp.GetType() == mysql.TypeNewDecimal && terror.ErrorEqual(err, types.ErrOverflow) { // Ignore the types.ErrOverflow when we convert TypeNewDecimal values. // A trimmed valid boundary point value would be returned then. Accordingly, the `excl` of the point // would be adjusted. Impossible ranges would be skipped by the `validInterval` call later. - } else if point.value.Kind() == types.KindMysqlTime && tp.GetType() == mysql.TypeTimestamp && terror.ErrorEqual(err, types.ErrWrongValue) { + } else if point.value.Kind() == types.KindMysqlTime && newTp.GetType() == mysql.TypeTimestamp && terror.ErrorEqual(err, types.ErrWrongValue) { // See issue #28424: query failed after add index // Ignore conversion from Date[Time] to Timestamp since it must be either out of range or impossible date, which will not match a point select - } else if tp.GetType() == mysql.TypeEnum && terror.ErrorEqual(err, types.ErrTruncated) { + } else if newTp.GetType() == mysql.TypeEnum && terror.ErrorEqual(err, types.ErrTruncated) { // Ignore the types.ErrorTruncated when we convert TypeEnum values. // We should cover Enum upper overflow, and convert to the biggest value. if point.value.GetInt64() > 0 { - upperEnum, err := types.ParseEnumValue(tp.GetElems(), uint64(len(tp.GetElems()))) + upperEnum, err := types.ParseEnumValue(newTp.GetElems(), uint64(len(newTp.GetElems()))) if err != nil { return nil, err } - casted.SetMysqlEnum(upperEnum, tp.GetCollate()) + casted.SetMysqlEnum(upperEnum, newTp.GetCollate()) } } else if terror.ErrorEqual(err, charset.ErrInvalidCharacterString) { // The invalid string can be produced by changing datum's underlying bytes directly. @@ -196,7 +196,7 @@ func convertPoint(sctx sessionctx.Context, point *point, tp *types.FieldType) (* } //revive:enable:empty-block } - valCmpCasted, err := point.value.Compare(sc, &casted, collate.GetCollator(tp.GetCollate())) + valCmpCasted, err := point.value.Compare(sc, &casted, collate.GetCollator(newTp.GetCollate())) if err != nil { return point, errors.Trace(err) } @@ -270,8 +270,8 @@ func estimateMemUsageForAppendPoints2Ranges(origin Ranges, rangePoints []*point) // If the second return value is true, it means that the estimated memory usage of ranges after appending points exceeds // rangeMaxSize and the function rejects appending points to ranges. func appendPoints2Ranges(sctx sessionctx.Context, origin Ranges, rangePoints []*point, - ft *types.FieldType, rangeMaxSize int64) (Ranges, bool, error) { - convertedPoints, err := convertPoints(sctx, rangePoints, ft, false, false) + newTp *types.FieldType, rangeMaxSize int64) (Ranges, bool, error) { + convertedPoints, err := convertPoints(sctx, rangePoints, newTp, false, false) if err != nil { return nil, false, errors.Trace(err) } @@ -285,7 +285,7 @@ func appendPoints2Ranges(sctx sessionctx.Context, origin Ranges, rangePoints []* if !oRange.IsPoint(sctx) { newIndexRanges = append(newIndexRanges, oRange) } else { - newRanges, err := appendPoints2IndexRange(oRange, convertedPoints, ft) + newRanges, err := appendPoints2IndexRange(oRange, convertedPoints, newTp) if err != nil { return nil, false, errors.Trace(err) } @@ -382,13 +382,13 @@ func AppendRanges2PointRanges(pointRanges Ranges, ranges Ranges, rangeMaxSize in // It will remove the nil and convert MinNotNull and MaxValue to MinInt64 or MinUint64 and MaxInt64 or MaxUint64. // rangeMaxSize is the max memory limit for ranges. O indicates no memory limit. // If the second return value is true, it means that the estimated memory usage of ranges exceeds rangeMaxSize and it falls back to full range. -func points2TableRanges(sctx sessionctx.Context, rangePoints []*point, tp *types.FieldType, rangeMaxSize int64) (Ranges, bool, error) { - convertedPoints, err := convertPoints(sctx, rangePoints, tp, true, true) +func points2TableRanges(sctx sessionctx.Context, rangePoints []*point, newTp *types.FieldType, rangeMaxSize int64) (Ranges, bool, error) { + convertedPoints, err := convertPoints(sctx, rangePoints, newTp, true, true) if err != nil { return nil, false, errors.Trace(err) } if rangeMaxSize > 0 && estimateMemUsageForPoints2Ranges(convertedPoints) > rangeMaxSize { - return FullIntRange(mysql.HasUnsignedFlag(tp.GetFlag())), true, nil + return FullIntRange(mysql.HasUnsignedFlag(newTp.GetFlag())), true, nil } ranges := make(Ranges, 0, len(convertedPoints)/2) for i := 0; i < len(convertedPoints); i += 2 { @@ -398,7 +398,7 @@ func points2TableRanges(sctx sessionctx.Context, rangePoints []*point, tp *types LowExclude: startPoint.excl, HighVal: []types.Datum{endPoint.value}, HighExclude: endPoint.excl, - Collators: []collate.Collator{collate.GetCollator(tp.GetCollate())}, + Collators: []collate.Collator{collate.GetCollator(newTp.GetCollate())}, } ranges = append(ranges, ran) } @@ -410,11 +410,12 @@ func points2TableRanges(sctx sessionctx.Context, rangePoints []*point, tp *types // The second return value is the conditions used to build ranges and the third return value is the remained conditions. func buildColumnRange(accessConditions []expression.Expression, sctx sessionctx.Context, tp *types.FieldType, tableRange bool, colLen int, rangeMaxSize int64) (Ranges, []expression.Expression, []expression.Expression, error) { - rb := builder{sc: sctx.GetSessionVars().StmtCtx} + rb := builder{sctx: sctx} + newTp := newFieldType(tp) rangePoints := getFullRange() for _, cond := range accessConditions { - collator := collate.GetCollator(tp.GetCollate()) - rangePoints = rb.intersection(rangePoints, rb.build(cond, collator), collator) + collator := collate.GetCollator(charset.CollationBin) + rangePoints = rb.intersection(rangePoints, rb.build(cond, newTp, colLen, true), collator) if rb.err != nil { return nil, nil, nil, errors.Trace(rb.err) } @@ -424,7 +425,7 @@ func buildColumnRange(accessConditions []expression.Expression, sctx sessionctx. rangeFallback bool err error ) - newTp := newFieldType(tp) + newTp = convertStringFTToBinaryCollate(newTp) if tableRange { ranges, rangeFallback, err = points2TableRanges(sctx, rangePoints, newTp, rangeMaxSize) } else { @@ -438,17 +439,6 @@ func buildColumnRange(accessConditions []expression.Expression, sctx sessionctx. return ranges, nil, accessConditions, nil } if colLen != types.UnspecifiedLength { - for _, ran := range ranges { - // If the length of the last column of LowVal is equal to the prefix length, LowExclude should be set false. - // For example, `col_varchar > 'xx'` should be converted to range [xx, +inf) when the prefix index length of - // `col_varchar` is 2. Otherwise we would miss values like 'xxx' if we execute (xx, +inf) index range scan. - if CutDatumByPrefixLen(&ran.LowVal[0], colLen, tp) || ReachPrefixLen(&ran.LowVal[0], colLen, tp) { - ran.LowExclude = false - } - if CutDatumByPrefixLen(&ran.HighVal[0], colLen, tp) { - ran.HighExclude = false - } - } ranges, err = UnionRanges(sctx, ranges, true) if err != nil { return nil, nil, nil, err @@ -484,7 +474,7 @@ func BuildColumnRange(conds []expression.Expression, sctx sessionctx.Context, tp func (d *rangeDetacher) buildRangeOnColsByCNFCond(newTp []*types.FieldType, eqAndInCount int, accessConds []expression.Expression) (Ranges, []expression.Expression, []expression.Expression, error) { - rb := builder{sc: d.sctx.GetSessionVars().StmtCtx} + rb := builder{sctx: d.sctx} var ( ranges Ranges rangeFallback bool @@ -492,14 +482,15 @@ func (d *rangeDetacher) buildRangeOnColsByCNFCond(newTp []*types.FieldType, eqAn ) for i := 0; i < eqAndInCount; i++ { // Build ranges for equal or in access conditions. - point := rb.build(accessConds[i], collate.GetCollator(newTp[i].GetCollate())) + point := rb.build(accessConds[i], newTp[i], d.lengths[i], d.convertToSortKey) if rb.err != nil { return nil, nil, nil, errors.Trace(rb.err) } + tmpNewTp := convertStringFTToBinaryCollate(newTp[i]) if i == 0 { - ranges, rangeFallback, err = points2Ranges(d.sctx, point, newTp[i], d.rangeMaxSize) + ranges, rangeFallback, err = points2Ranges(d.sctx, point, tmpNewTp, d.rangeMaxSize) } else { - ranges, rangeFallback, err = appendPoints2Ranges(d.sctx, ranges, point, newTp[i], d.rangeMaxSize) + ranges, rangeFallback, err = appendPoints2Ranges(d.sctx, ranges, point, tmpNewTp, d.rangeMaxSize) } if err != nil { return nil, nil, nil, errors.Trace(err) @@ -513,15 +504,26 @@ func (d *rangeDetacher) buildRangeOnColsByCNFCond(newTp []*types.FieldType, eqAn // Build rangePoints for non-equal access conditions. for i := eqAndInCount; i < len(accessConds); i++ { collator := collate.GetCollator(newTp[eqAndInCount].GetCollate()) - rangePoints = rb.intersection(rangePoints, rb.build(accessConds[i], collator), collator) + if d.convertToSortKey { + collator = collate.GetCollator(charset.CollationBin) + } + rangePoints = rb.intersection(rangePoints, rb.build(accessConds[i], newTp[eqAndInCount], d.lengths[eqAndInCount], d.convertToSortKey), collator) if rb.err != nil { return nil, nil, nil, errors.Trace(rb.err) } } + var tmpNewTp *types.FieldType + if eqAndInCount == 0 || eqAndInCount < len(accessConds) { + if d.convertToSortKey { + tmpNewTp = convertStringFTToBinaryCollate(newTp[eqAndInCount]) + } else { + tmpNewTp = newTp[eqAndInCount] + } + } if eqAndInCount == 0 { - ranges, rangeFallback, err = points2Ranges(d.sctx, rangePoints, newTp[0], d.rangeMaxSize) + ranges, rangeFallback, err = points2Ranges(d.sctx, rangePoints, tmpNewTp, d.rangeMaxSize) } else if eqAndInCount < len(accessConds) { - ranges, rangeFallback, err = appendPoints2Ranges(d.sctx, ranges, rangePoints, newTp[eqAndInCount], d.rangeMaxSize) + ranges, rangeFallback, err = appendPoints2Ranges(d.sctx, ranges, rangePoints, tmpNewTp, d.rangeMaxSize) } if err != nil { return nil, nil, nil, errors.Trace(err) @@ -533,6 +535,18 @@ func (d *rangeDetacher) buildRangeOnColsByCNFCond(newTp []*types.FieldType, eqAn return ranges, accessConds, nil, nil } +func convertStringFTToBinaryCollate(ft *types.FieldType) *types.FieldType { + if ft.EvalType() != types.ETString || + ft.GetType() == mysql.TypeEnum || + ft.GetType() == mysql.TypeSet { + return ft + } + newTp := ft.Clone() + newTp.SetCharset(charset.CharsetBin) + newTp.SetCollate(charset.CollationBin) + return newTp +} + // buildCNFIndexRange builds the range for index where the top layer is CNF. func (d *rangeDetacher) buildCNFIndexRange(newTp []*types.FieldType, eqAndInCount int, accessConds []expression.Expression) (Ranges, []expression.Expression, []expression.Expression, error) { @@ -543,11 +557,9 @@ func (d *rangeDetacher) buildCNFIndexRange(newTp []*types.FieldType, eqAndInCoun // Take prefix index into consideration. if hasPrefix(d.lengths) { - if fixPrefixColRange(ranges, d.lengths, newTp) { - ranges, err = UnionRanges(d.sctx, ranges, d.mergeConsecutive) - if err != nil { - return nil, nil, nil, errors.Trace(err) - } + ranges, err = UnionRanges(d.sctx, ranges, d.mergeConsecutive) + if err != nil { + return nil, nil, nil, errors.Trace(err) } } @@ -618,47 +630,28 @@ func hasPrefix(lengths []int) bool { return false } -// fixPrefixColRange checks whether the range of one column exceeds the length and needs to be cut. -// It specially handles the last column of each range point. If the last one need to be cut, it will -// change the exclude status of that point and return `true` to tell -// that we need do a range merging since that interval may have intersection. -// e.g. if the interval is (-inf -inf, a xxxxx), (a xxxxx, +inf +inf) and the length of the last column is 3, -// -// then we'll change it to (-inf -inf, a xxx], [a xxx, +inf +inf). You can see that this two interval intersect, -// so we need a merge operation. -// -// Q: only checking the last column to decide whether the endpoint's exclude status needs to be reset is enough? -// A: Yes, suppose that the interval is (-inf -inf, a xxxxx b) and only the second column needs to be cut. -// -// The result would be (-inf -inf, a xxx b) if the length of it is 3. Obviously we only need to care about the data -// whose the first two key is `a` and `xxx`. It read all data whose index value begins with `a` and `xxx` and the third -// value less than `b`, covering the values begin with `a` and `xxxxx` and the third value less than `b` perfectly. -// So in this case we don't need to reset its exclude status. The right endpoint case can be proved in the same way. -func fixPrefixColRange(ranges Ranges, lengths []int, tp []*types.FieldType) bool { - var hasCut bool - for _, ran := range ranges { - lowTail := len(ran.LowVal) - 1 - for i := 0; i < lowTail; i++ { - hasCut = CutDatumByPrefixLen(&ran.LowVal[i], lengths[i], tp[i]) || hasCut - } - lowCut := CutDatumByPrefixLen(&ran.LowVal[lowTail], lengths[lowTail], tp[lowTail]) - // If the length of the last column of LowVal is equal to the prefix length, LowExclude should be set false. - // For example, `col_varchar > 'xx'` should be converted to range [xx, +inf) when the prefix index length of - // `col_varchar` is 2. Otherwise we would miss values like 'xxx' if we execute (xx, +inf) index range scan. - if lowCut || ReachPrefixLen(&ran.LowVal[lowTail], lengths[lowTail], tp[lowTail]) { - ran.LowExclude = false - } - highTail := len(ran.HighVal) - 1 - for i := 0; i < highTail; i++ { - hasCut = CutDatumByPrefixLen(&ran.HighVal[i], lengths[i], tp[i]) || hasCut +// cutPrefixForPoints cuts the prefix of points according to the prefix length of the prefix index. +// It may modify the point.value and point.excl. The modification is in-place. +// This function doesn't require the start and end points to be paired in the input. +func cutPrefixForPoints(points []*point, length int, tp *types.FieldType) { + if length == types.UnspecifiedLength { + return + } + for _, p := range points { + if p == nil { + continue } - highCut := CutDatumByPrefixLen(&ran.HighVal[highTail], lengths[highTail], tp[highTail]) - if highCut { - ran.HighExclude = false + cut := CutDatumByPrefixLen(&p.value, length, tp) + // In two cases, we need to convert the exclusive point to an inclusive point. + // case 1: we actually cut the value to accommodate the prefix index. + if cut || + // case 2: the value is already equal to the prefix index. + // For example, col_varchar > 'xx' should be converted to range [xx, +inf) when the prefix index length of + // `col_varchar` is 2. Otherwise, we would miss values like 'xxx' if we execute (xx, +inf) index range scan. + (p.start && ReachPrefixLen(&p.value, length, tp)) { + p.excl = false } - hasCut = hasCut || lowCut || highCut } - return hasCut } // CutDatumByPrefixLen cuts the datum according to the prefix length. @@ -701,8 +694,10 @@ func ReachPrefixLen(v *types.Datum, length int, tp *types.FieldType) bool { return false } -// We cannot use the FieldType of column directly. e.g. the column a is int32 and we have a > 1111111111111111111. +// In util/ranger, for each datum that is used in the Range, we will convert data type for them. +// But we cannot use the FieldType of column directly. e.g. the column a is int32 and we have a > 1111111111111111111. // Obviously the constant is bigger than MaxInt32, so we will get overflow error if we use the FieldType of column a. +// In util/ranger here, we usually use "newTp" to emphasize its difference from the original FieldType of the column. func newFieldType(tp *types.FieldType) *types.FieldType { switch tp.GetType() { // To avoid overflow error. diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index db54c134b6cfd..709aa58db4ef6 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -1416,7 +1416,7 @@ create table t( indexPos: 0, exprStr: `a LIKE 'abc%'`, accessConds: `[like(test.t.a, abc%, 92)]`, - filterConds: "[]", + filterConds: "[like(test.t.a, abc%, 92)]", resultStr: "[[\"abc\",\"abd\")]", }, { @@ -1424,20 +1424,20 @@ create table t( exprStr: "a LIKE 'abc_'", accessConds: "[like(test.t.a, abc_, 92)]", filterConds: "[like(test.t.a, abc_, 92)]", - resultStr: "[(\"abc\",\"abd\")]", + resultStr: "[[\"abc\",\"abd\")]", }, { indexPos: 0, exprStr: "a LIKE 'abc'", accessConds: "[like(test.t.a, abc, 92)]", - filterConds: "[]", + filterConds: "[like(test.t.a, abc, 92)]", resultStr: "[[\"abc\",\"abc\"]]", }, { indexPos: 0, exprStr: `a LIKE "ab\_c"`, accessConds: "[like(test.t.a, ab\\_c, 92)]", - filterConds: "[]", + filterConds: "[like(test.t.a, ab\\_c, 92)]", resultStr: "[[\"ab_c\",\"ab_c\"]]", }, { @@ -1451,21 +1451,21 @@ create table t( indexPos: 0, exprStr: `a LIKE '\%a'`, accessConds: "[like(test.t.a, \\%a, 92)]", - filterConds: "[]", + filterConds: "[like(test.t.a, \\%a, 92)]", resultStr: `[["%a","%a"]]`, }, { indexPos: 0, exprStr: `a LIKE "\\"`, accessConds: "[like(test.t.a, \\, 92)]", - filterConds: "[]", + filterConds: "[like(test.t.a, \\, 92)]", resultStr: "[[\"\\\\\",\"\\\\\"]]", }, { indexPos: 0, exprStr: `a LIKE "\\\\a%"`, accessConds: `[like(test.t.a, \\a%, 92)]`, - filterConds: "[]", + filterConds: "[like(test.t.a, \\\\a%, 92)]", resultStr: "[[\"\\\\a\",\"\\\\b\")]", }, { @@ -1613,7 +1613,7 @@ create table t( exprStr: `e = "你好啊"`, accessConds: "[eq(test.t.e, 你好啊)]", filterConds: "[eq(test.t.e, 你好啊)]", - resultStr: "[[0xE4BD,0xE4BD]]", + resultStr: "[[\"\\xe4\\xbd\",\"\\xe4\\xbd\"]]", }, { indexPos: 2, @@ -1648,21 +1648,21 @@ create table t( exprStr: "f >= 'a' and f <= 'B'", accessConds: "[ge(test.t.f, a) le(test.t.f, B)]", filterConds: "[]", - resultStr: "[[\"a\",\"B\"]]", + resultStr: "[[\"\\x00A\",\"\\x00B\"]]", }, { indexPos: 4, exprStr: "f in ('a', 'B')", accessConds: "[in(test.t.f, a, B)]", filterConds: "[]", - resultStr: "[[\"a\",\"a\"] [\"B\",\"B\"]]", + resultStr: "[[\"\\x00A\",\"\\x00A\"] [\"\\x00B\",\"\\x00B\"]]", }, { indexPos: 4, exprStr: "f = 'a' and f = 'B' collate utf8mb4_bin", accessConds: "[eq(test.t.f, a)]", filterConds: "[eq(test.t.f, B)]", - resultStr: "[[\"a\",\"a\"]]", + resultStr: "[[\"\\x00A\",\"\\x00A\"]]", }, { indexPos: 4, @@ -1676,7 +1676,7 @@ create table t( exprStr: "d in ('aab', 'aac') and e = 'a'", accessConds: "[in(test.t.d, aab, aac) eq(test.t.e, a)]", filterConds: "[in(test.t.d, aab, aac)]", - resultStr: "[[\"aa\" 0x61,\"aa\" 0x61]]", + resultStr: "[[\"aa\" \"a\",\"aa\" \"a\"]]", }, { indexPos: 6, diff --git a/util/ranger/types.go b/util/ranger/types.go index 0024a8b039c18..c666175090763 100644 --- a/util/ranger/types.go +++ b/util/ranger/types.go @@ -269,7 +269,7 @@ func formatDatum(d types.Datum, isLeftSide bool) string { return "+inf" } case types.KindBytes: - return fmt.Sprintf("0x%X", d.GetValue()) + return fmt.Sprintf("%q", d.GetValue()) case types.KindString: return fmt.Sprintf("%q", d.GetValue()) case types.KindMysqlEnum, types.KindMysqlSet,