From 962a74c459ff08da49cb27ba4890db0a895a1420 Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Thu, 23 Nov 2023 17:58:30 +0800 Subject: [PATCH 1/3] fix --- .../testdata/plan_suite_out.json | 18 +++---- pkg/util/ranger/checker.go | 16 +++++- pkg/util/ranger/ranger_test.go | 12 ++--- .../explain_generate_column_substitute.result | 3 +- .../physicalplantest/physical_plan.result | 44 ++++++++-------- .../r/planner/core/plan.result | 50 +++++++++++-------- 6 files changed, 84 insertions(+), 59 deletions(-) diff --git a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json index 85743605cc0b7..234706233bd4e 100644 --- a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json +++ b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json @@ -2171,11 +2171,11 @@ }, { "SQL": "select a from t where c_str like ''", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"\",\"\"]])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"\",\"\"]]->Sel([like(test.t.c_str, , 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc'", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abc\"]])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abc\"]]->Sel([like(test.t.c_str, abc, 92)]))->Projection" }, { "SQL": "select a from t where c_str not like 'abc'", @@ -2191,7 +2191,7 @@ }, { "SQL": "select a from t where c_str like 'abc%'", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc\",\"abd\")]->Sel([like(test.t.c_str, abc%, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc_'", @@ -2203,23 +2203,23 @@ }, { "SQL": "select a from t where c_str like 'abc\\_' escape ''", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc\\_'", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc\\\\_'", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc_\"]]->Sel([like(test.t.c_str, abc\\_, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc\\_%'", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc\\_%, 92)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc=_%' escape '='", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"abc_\",\"abc`\")]->Sel([like(test.t.c_str, abc=_%, 61)]))->Projection" }, { "SQL": "select a from t where c_str like 'abc\\__'", @@ -2227,7 +2227,7 @@ }, { "SQL": "select a from t where c_str like 123", - "Best": "IndexReader(Index(t.c_d_e_str)[[\"123\",\"123\"]])->Projection" + "Best": "IndexReader(Index(t.c_d_e_str)[[\"123\",\"123\"]]->Sel([like(test.t.c_str, 123, 92)]))->Projection" }, { "SQL": "select a from t where c = 1.9 and d > 3", diff --git a/pkg/util/ranger/checker.go b/pkg/util/ranger/checker.go index b279678459460..2b06e1815401b 100644 --- a/pkg/util/ranger/checker.go +++ b/pkg/util/ranger/checker.go @@ -17,6 +17,7 @@ package ranger import ( "github.com/pingcap/tidb/pkg/expression" "github.com/pingcap/tidb/pkg/parser/ast" + "github.com/pingcap/tidb/pkg/parser/charset" "github.com/pingcap/tidb/pkg/parser/mysql" "github.com/pingcap/tidb/pkg/sessionctx" "github.com/pingcap/tidb/pkg/types" @@ -168,11 +169,22 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA if err != nil { return false, true } + likeFuncReserve := !c.isFullLengthColumn() + + // Different from `=`, trailing spaces are always significant, and can't be ignored in `like`. + // In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are + // unable to distinguish 'xxx' from 'xxx ' by a single index range scan, and we may read more data than needed by + // the `like` function. Therefore, a Selection is needed to filter the data. + // Since all collations, except for binary, implemented in tidb are PAD SPACE collations, we use a simple + // collation != binary check here. + if collation != charset.CollationBin { + likeFuncReserve = true + } + if len(patternStr) == 0 { - return true, !c.isFullLengthColumn() + return true, likeFuncReserve } escape := byte(scalar.GetArgs()[2].(*expression.Constant).Value.GetInt64()) - likeFuncReserve := !c.isFullLengthColumn() for i := 0; i < len(patternStr); i++ { if patternStr[i] == escape { i++ diff --git a/pkg/util/ranger/ranger_test.go b/pkg/util/ranger/ranger_test.go index ce519eb053ce9..aeb2b8a1dc31b 100644 --- a/pkg/util/ranger/ranger_test.go +++ b/pkg/util/ranger/ranger_test.go @@ -1106,7 +1106,7 @@ create table t( indexPos: 0, exprStr: `a LIKE 'abc%'`, accessConds: `[like(test.t.a, abc%, 92)]`, - filterConds: "[]", + filterConds: "[like(test.t.a, abc%, 92)]", resultStr: "[[\"abc\",\"abd\")]", }, { @@ -1120,14 +1120,14 @@ create table t( indexPos: 0, exprStr: "a LIKE 'abc'", accessConds: "[like(test.t.a, abc, 92)]", - filterConds: "[]", + filterConds: "[like(test.t.a, abc, 92)]", resultStr: "[[\"abc\",\"abc\"]]", }, { indexPos: 0, exprStr: `a LIKE "ab\_c"`, accessConds: "[like(test.t.a, ab\\_c, 92)]", - filterConds: "[]", + filterConds: "[like(test.t.a, ab\\_c, 92)]", resultStr: "[[\"ab_c\",\"ab_c\"]]", }, { @@ -1141,21 +1141,21 @@ create table t( indexPos: 0, exprStr: `a LIKE '\%a'`, accessConds: "[like(test.t.a, \\%a, 92)]", - filterConds: "[]", + filterConds: "[like(test.t.a, \\%a, 92)]", resultStr: `[["%a","%a"]]`, }, { indexPos: 0, exprStr: `a LIKE "\\"`, accessConds: "[like(test.t.a, \\, 92)]", - filterConds: "[]", + filterConds: "[like(test.t.a, \\, 92)]", resultStr: "[[\"\\\\\",\"\\\\\"]]", }, { indexPos: 0, exprStr: `a LIKE "\\\\a%"`, accessConds: `[like(test.t.a, \\a%, 92)]`, - filterConds: "[]", + filterConds: "[like(test.t.a, \\\\a%, 92)]", resultStr: "[[\"\\\\a\",\"\\\\b\")]", }, { diff --git a/tests/integrationtest/r/explain_generate_column_substitute.result b/tests/integrationtest/r/explain_generate_column_substitute.result index 0a1b4a035342c..b4f9fe097c3c6 100644 --- a/tests/integrationtest/r/explain_generate_column_substitute.result +++ b/tests/integrationtest/r/explain_generate_column_substitute.result @@ -396,7 +396,8 @@ id estRows task access object operator info StreamAgg 1.00 root funcs:count(Column#6)->Column#4 └─IndexReader 1.00 root index:StreamAgg └─StreamAgg 1.00 cop[tikv] funcs:count(1)->Column#6 - └─IndexRangeScan 250.00 cop[tikv] table:tbl1, index:expression_index(md5(`s`)) range:["02e74f10e0327ad868d138f2b4fdd6f","02e74f10e0327ad868d138f2b4fdd6g"), keep order:false, stats:pseudo + └─Selection 250.00 cop[tikv] like(md5(cast(explain_generate_column_substitute.tbl1.s, var_string(20))), "02e74f10e0327ad868d138f2b4fdd6f%", 92) + └─IndexRangeScan 250.00 cop[tikv] table:tbl1, index:expression_index(md5(`s`)) range:["02e74f10e0327ad868d138f2b4fdd6f","02e74f10e0327ad868d138f2b4fdd6g"), keep order:false, stats:pseudo select count(*) from tbl1 use index() where md5(s) like '02e74f10e0327ad868d138f2b4fdd6f%'; count(*) 64 diff --git a/tests/integrationtest/r/planner/core/casetest/physicalplantest/physical_plan.result b/tests/integrationtest/r/planner/core/casetest/physicalplantest/physical_plan.result index 5db9c5101fc29..541e196db8f36 100644 --- a/tests/integrationtest/r/planner/core/casetest/physicalplantest/physical_plan.result +++ b/tests/integrationtest/r/planner/core/casetest/physicalplantest/physical_plan.result @@ -3308,21 +3308,21 @@ Projection 249.75 root planner__core__casetest__physicalplantest__physical_plan │ └─StreamAgg 249.75 root funcs:max(planner__core__casetest__physicalplantest__physical_plan.tc.id)->Column#14 │ └─TopN 62.38 root planner__core__casetest__physicalplantest__physical_plan.tc.id:desc, offset:0, count:1 │ └─IndexLookUp 62.38 root - │ ├─Selection(Build) 62.44 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.name, planner__core__casetest__physicalplantest__physical_plan.tc.name) + │ ├─Selection(Build) 62.38 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.name, planner__core__casetest__physicalplantest__physical_plan.tc.name), like(planner__core__casetest__physicalplantest__physical_plan.tc.name, "chad99%", 92) │ │ └─IndexRangeScan 62437.50 cop[tikv] table:tc, index:idx_tc_name(name) range:["chad99","chad9:"), keep order:false, stats:pseudo │ └─TopN(Probe) 62.38 cop[tikv] planner__core__casetest__physicalplantest__physical_plan.tc.id:desc, offset:0, count:1 │ └─Selection 62.38 cop[tikv] not(isnull(planner__core__casetest__physicalplantest__physical_plan.tc.id)) - │ └─TableRowIDScan 62.44 cop[tikv] table:tc keep order:false, stats:pseudo + │ └─TableRowIDScan 62.38 cop[tikv] table:tc keep order:false, stats:pseudo └─Selection(Probe) 199.80 root gt(Column#19, 100) └─MaxOneRow 249.75 root └─StreamAgg 249.75 root funcs:max(planner__core__casetest__physicalplantest__physical_plan.td.id)->Column#19 - └─Limit 62.38 root offset:0, count:1 - └─Projection 62.38 root planner__core__casetest__physicalplantest__physical_plan.td.id, planner__core__casetest__physicalplantest__physical_plan.td.name - └─IndexLookUp 62.38 root - ├─Selection(Build) 2495.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.id, planner__core__casetest__physicalplantest__physical_plan.td.id) - │ └─IndexFullScan 2495002.50 cop[tikv] table:td, index:idx_tc_id(id) keep order:true, desc, stats:pseudo - └─Selection(Probe) 62.38 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.td.name, "chad999%", 92) - └─TableRowIDScan 2495.00 cop[tikv] table:td keep order:false, stats:pseudo + └─TopN 62.38 root planner__core__casetest__physicalplantest__physical_plan.td.id:desc, offset:0, count:1 + └─IndexLookUp 62.38 root + ├─Selection(Build) 1560.94 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.td.name, "chad999%", 92) + │ └─IndexRangeScan 62437.50 cop[tikv] table:td, index:idx_tc_name(name) range:["chad999","chad99:"), keep order:false, stats:pseudo + └─TopN(Probe) 62.38 cop[tikv] planner__core__casetest__physicalplantest__physical_plan.td.id:desc, offset:0, count:1 + └─Selection 62.38 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.id, planner__core__casetest__physicalplantest__physical_plan.td.id), not(isnull(planner__core__casetest__physicalplantest__physical_plan.td.id)) + └─TableRowIDScan 1560.94 cop[tikv] table:td keep order:false, stats:pseudo SELECT ta.NAME FROM ta WHERE EXISTS (select /*+ semi_join_rewrite() */ 1 from tb where ta.code = tb.code and tb.NAME LIKE 'chad9%') AND (select /*+ no_decorrelate() */ max(id) from tc where ta.name=tc.name and tc.name like 'chad99%') > 100 and (select /*+ no_decorrelate() */ max(id) from td where ta.id=td.id and td.name like 'chad999%') > 100; NAME show warnings; @@ -3335,29 +3335,31 @@ Projection 10000.00 root planner__core__casetest__physicalplantest__physical_pl │ ├─Apply(Build) 10000.00 root CARTESIAN semi join │ │ ├─TableReader(Build) 10000.00 root data:TableFullScan │ │ │ └─TableFullScan 10000.00 cop[tikv] table:ta keep order:false, stats:pseudo - │ │ └─TableReader(Probe) 2500.00 root data:Selection - │ │ └─Selection 2500.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.code, planner__core__casetest__physicalplantest__physical_plan.tb.code), like(planner__core__casetest__physicalplantest__physical_plan.tb.name, "chad9%", 92) - │ │ └─TableFullScan 100000000.00 cop[tikv] table:tb keep order:false, stats:pseudo + │ │ └─IndexLookUp(Probe) 2500.00 root + │ │ ├─Selection(Build) 62500.00 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.tb.name, "chad9%", 92) + │ │ │ └─IndexRangeScan 2500000.00 cop[tikv] table:tb, index:idx_tb_name(name) range:["chad9","chad:"), keep order:false, stats:pseudo + │ │ └─Selection(Probe) 2500.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.code, planner__core__casetest__physicalplantest__physical_plan.tb.code) + │ │ └─TableRowIDScan 62500.00 cop[tikv] table:tb keep order:false, stats:pseudo │ └─Selection(Probe) 8000.00 root gt(Column#14, 100) │ └─MaxOneRow 10000.00 root │ └─StreamAgg 10000.00 root funcs:max(planner__core__casetest__physicalplantest__physical_plan.tc.id)->Column#14 │ └─TopN 2497.50 root planner__core__casetest__physicalplantest__physical_plan.tc.id:desc, offset:0, count:1 │ └─IndexLookUp 2497.50 root - │ ├─Selection(Build) 2500.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.name, planner__core__casetest__physicalplantest__physical_plan.tc.name) + │ ├─Selection(Build) 2497.50 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.name, planner__core__casetest__physicalplantest__physical_plan.tc.name), like(planner__core__casetest__physicalplantest__physical_plan.tc.name, "chad99%", 92) │ │ └─IndexRangeScan 2500000.00 cop[tikv] table:tc, index:idx_tc_name(name) range:["chad99","chad9:"), keep order:false, stats:pseudo │ └─TopN(Probe) 2497.50 cop[tikv] planner__core__casetest__physicalplantest__physical_plan.tc.id:desc, offset:0, count:1 │ └─Selection 2497.50 cop[tikv] not(isnull(planner__core__casetest__physicalplantest__physical_plan.tc.id)) - │ └─TableRowIDScan 2500.00 cop[tikv] table:tc keep order:false, stats:pseudo + │ └─TableRowIDScan 2497.50 cop[tikv] table:tc keep order:false, stats:pseudo └─Selection(Probe) 8000.00 root gt(Column#19, 100) └─MaxOneRow 10000.00 root └─StreamAgg 10000.00 root funcs:max(planner__core__casetest__physicalplantest__physical_plan.td.id)->Column#19 - └─Limit 2497.50 root offset:0, count:1 - └─Projection 2497.50 root planner__core__casetest__physicalplantest__physical_plan.td.id, planner__core__casetest__physicalplantest__physical_plan.td.name - └─IndexLookUp 2497.50 root - ├─Selection(Build) 99900.00 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.id, planner__core__casetest__physicalplantest__physical_plan.td.id) - │ └─IndexFullScan 99900000.00 cop[tikv] table:td, index:idx_tc_id(id) keep order:true, desc, stats:pseudo - └─Selection(Probe) 2497.50 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.td.name, "chad999%", 92) - └─TableRowIDScan 99900.00 cop[tikv] table:td keep order:false, stats:pseudo + └─TopN 2497.50 root planner__core__casetest__physicalplantest__physical_plan.td.id:desc, offset:0, count:1 + └─IndexLookUp 2497.50 root + ├─Selection(Build) 62500.00 cop[tikv] like(planner__core__casetest__physicalplantest__physical_plan.td.name, "chad999%", 92) + │ └─IndexRangeScan 2500000.00 cop[tikv] table:td, index:idx_tc_name(name) range:["chad999","chad99:"), keep order:false, stats:pseudo + └─TopN(Probe) 2497.50 cop[tikv] planner__core__casetest__physicalplantest__physical_plan.td.id:desc, offset:0, count:1 + └─Selection 2497.50 cop[tikv] eq(planner__core__casetest__physicalplantest__physical_plan.ta.id, planner__core__casetest__physicalplantest__physical_plan.td.id), not(isnull(planner__core__casetest__physicalplantest__physical_plan.td.id)) + └─TableRowIDScan 62500.00 cop[tikv] table:td keep order:false, stats:pseudo SELECT ta.NAME FROM ta WHERE EXISTS (select /*+ no_decorrelate() */ 1 from tb where ta.code = tb.code and tb.NAME LIKE 'chad9%') AND (select /*+ no_decorrelate() */ max(id) from tc where ta.name=tc.name and tc.name like 'chad99%') > 100 and (select /*+ no_decorrelate() */ max(id) from td where ta.id=td.id and td.name like 'chad999%') > 100; NAME show warnings; diff --git a/tests/integrationtest/r/planner/core/plan.result b/tests/integrationtest/r/planner/core/plan.result index 76bee6627810d..344e3c2796215 100644 --- a/tests/integrationtest/r/planner/core/plan.result +++ b/tests/integrationtest/r/planner/core/plan.result @@ -137,8 +137,9 @@ CREATE TABLE `t1` ( `a` varchar(10) DEFAULT NULL, `b` varchar(10) DEFAULT NULL explain format='brief' select * from t1 where concat(a, b) like "aadwa" and a = "a"; id estRows task access object operator info Projection 0.10 root planner__core__plan.t1.a, planner__core__plan.t1.b -└─IndexReader 0.10 root index:IndexRangeScan - └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo +└─IndexReader 0.10 root index:Selection + └─Selection 0.10 cop[tikv] like(concat(planner__core__plan.t1.a, planner__core__plan.t1.b), "aadwa", 92) + └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo explain format='brief' select b from t1 where concat(a, b) >= "aa" and a = "b"; id estRows task access object operator info Projection 33.33 root planner__core__plan.t1.b @@ -147,8 +148,9 @@ Projection 33.33 root planner__core__plan.t1.b explain format='brief' select * from t1 where concat(a, b) like "aadwa" and a = "a"; id estRows task access object operator info Projection 0.10 root planner__core__plan.t1.a, planner__core__plan.t1.b -└─IndexReader 0.10 root index:IndexRangeScan - └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo +└─IndexReader 0.10 root index:Selection + └─Selection 0.10 cop[tikv] like(concat(planner__core__plan.t1.a, planner__core__plan.t1.b), "aadwa", 92) + └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo explain format='brief' select b from t1 where concat(a, b) >= "aa" and a = "b"; id estRows task access object operator info Projection 33.33 root planner__core__plan.t1.b @@ -157,8 +159,9 @@ Projection 33.33 root planner__core__plan.t1.b explain format='brief' select * from t1 where concat(a, b) like "aadwa" and a = "a"; id estRows task access object operator info Projection 0.10 root planner__core__plan.t1.a, planner__core__plan.t1.b -└─IndexReader 0.10 root index:IndexRangeScan - └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo +└─IndexReader 0.10 root index:Selection + └─Selection 0.10 cop[tikv] like(concat(planner__core__plan.t1.a, planner__core__plan.t1.b), "aadwa", 92) + └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo explain format='brief' select b from t1 where concat(a, b) >= "aa" and a = "b"; id estRows task access object operator info Projection 33.33 root planner__core__plan.t1.b @@ -167,8 +170,9 @@ Projection 33.33 root planner__core__plan.t1.b explain format='brief' select * from t1 where concat(a, b) like "aadwa" and a = "a"; id estRows task access object operator info Projection 0.10 root planner__core__plan.t1.a, planner__core__plan.t1.b -└─IndexReader 0.10 root index:IndexRangeScan - └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo +└─IndexReader 0.10 root index:Selection + └─Selection 0.10 cop[tikv] like(concat(planner__core__plan.t1.a, planner__core__plan.t1.b), "aadwa", 92) + └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo explain format='brief' select b from t1 where concat(a, b) >= "aa" and a = "b"; id estRows task access object operator info Projection 33.33 root planner__core__plan.t1.b @@ -177,8 +181,9 @@ Projection 33.33 root planner__core__plan.t1.b explain format='brief' select * from t1 where concat(a, b) like "aadwa" and a = "a"; id estRows task access object operator info Projection 0.10 root planner__core__plan.t1.a, planner__core__plan.t1.b -└─IndexReader 0.10 root index:IndexRangeScan - └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo +└─IndexReader 0.10 root index:Selection + └─Selection 0.10 cop[tikv] like(concat(planner__core__plan.t1.a, planner__core__plan.t1.b), "aadwa", 92) + └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo explain format='brief' select b from t1 where concat(a, b) >= "aa" and a = "b"; id estRows task access object operator info Projection 33.33 root planner__core__plan.t1.b @@ -187,8 +192,9 @@ Projection 33.33 root planner__core__plan.t1.b explain format='brief' select * from t1 where concat(a, b) like "aadwa" and a = "a"; id estRows task access object operator info Projection 0.10 root planner__core__plan.t1.a, planner__core__plan.t1.b -└─IndexReader 0.10 root index:IndexRangeScan - └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo +└─IndexReader 0.10 root index:Selection + └─Selection 0.10 cop[tikv] like(concat(planner__core__plan.t1.a, planner__core__plan.t1.b), "aadwa", 92) + └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo explain format='brief' select b from t1 where concat(a, b) >= "aa" and a = "b"; id estRows task access object operator info Projection 33.33 root planner__core__plan.t1.b @@ -197,8 +203,9 @@ Projection 33.33 root planner__core__plan.t1.b explain format='brief' select * from t1 where concat(a, b) like "aadwa" and a = "a"; id estRows task access object operator info Projection 0.10 root planner__core__plan.t1.a, planner__core__plan.t1.b -└─IndexReader 0.10 root index:IndexRangeScan - └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo +└─IndexReader 0.10 root index:Selection + └─Selection 0.10 cop[tikv] like(concat(planner__core__plan.t1.a, planner__core__plan.t1.b), "aadwa", 92) + └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo explain format='brief' select b from t1 where concat(a, b) >= "aa" and a = "b"; id estRows task access object operator info Projection 33.33 root planner__core__plan.t1.b @@ -207,8 +214,9 @@ Projection 33.33 root planner__core__plan.t1.b explain format='brief' select * from t1 where concat(a, b) like "aadwa" and a = "a"; id estRows task access object operator info Projection 0.10 root planner__core__plan.t1.a, planner__core__plan.t1.b -└─IndexReader 0.10 root index:IndexRangeScan - └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo +└─IndexReader 0.10 root index:Selection + └─Selection 0.10 cop[tikv] like(concat(planner__core__plan.t1.a, planner__core__plan.t1.b), "aadwa", 92) + └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo explain format='brief' select b from t1 where concat(a, b) >= "aa" and a = "b"; id estRows task access object operator info Projection 33.33 root planner__core__plan.t1.b @@ -217,8 +225,9 @@ Projection 33.33 root planner__core__plan.t1.b explain format='brief' select * from t1 where concat(a, b) like "aadwa" and a = "a"; id estRows task access object operator info Projection 0.10 root planner__core__plan.t1.a, planner__core__plan.t1.b -└─IndexReader 0.10 root index:IndexRangeScan - └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo +└─IndexReader 0.10 root index:Selection + └─Selection 0.10 cop[tikv] like(concat(planner__core__plan.t1.a, planner__core__plan.t1.b), "aadwa", 92) + └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo explain format='brief' select b from t1 where concat(a, b) >= "aa" and a = "b"; id estRows task access object operator info Projection 33.33 root planner__core__plan.t1.b @@ -227,8 +236,9 @@ Projection 33.33 root planner__core__plan.t1.b explain format='brief' select * from t1 where concat(a, b) like "aadwa" and a = "a"; id estRows task access object operator info Projection 0.10 root planner__core__plan.t1.a, planner__core__plan.t1.b -└─IndexReader 0.10 root index:IndexRangeScan - └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo +└─IndexReader 0.10 root index:Selection + └─Selection 0.10 cop[tikv] like(concat(planner__core__plan.t1.a, planner__core__plan.t1.b), "aadwa", 92) + └─IndexRangeScan 0.10 cop[tikv] table:t1, index:idx2(a, concat(`a`, `b`), b) range:["a" "aadwa","a" "aadwa"], keep order:false, stats:pseudo explain format='brief' select b from t1 where concat(a, b) >= "aa" and a = "b"; id estRows task access object operator info Projection 33.33 root planner__core__plan.t1.b From c9f1753297e5024e10585f1e0799da804ca10c2b Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Thu, 23 Nov 2023 18:19:09 +0800 Subject: [PATCH 2/3] add test cases --- .../core/issuetest/planner_issue.result | 45 +++++++++++++++++++ .../planner/core/issuetest/planner_issue.test | 15 +++++++ 2 files changed, 60 insertions(+) diff --git a/tests/integrationtest/r/planner/core/issuetest/planner_issue.result b/tests/integrationtest/r/planner/core/issuetest/planner_issue.result index 5d95672abacb7..9a5474ce1b188 100644 --- a/tests/integrationtest/r/planner/core/issuetest/planner_issue.result +++ b/tests/integrationtest/r/planner/core/issuetest/planner_issue.result @@ -180,3 +180,48 @@ LEFT JOIN tmp3 c3 ON c3.id = '1'; id id 1 1 1 1 +drop table if exists t1, t2; +create table t1(a varchar(20) collate utf8mb4_bin, index ia(a)); +insert into t1 value('测试'),('测试 '); +explain format = brief select *,length(a) from t1 where a like '测试 %'; +id estRows task access object operator info +Projection 250.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3 +└─UnionScan 250.00 root like(planner__core__issuetest__planner_issue.t1.a, "测试 %", 92) + └─IndexReader 250.00 root index:Selection + └─Selection 250.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "测试 %", 92) + └─IndexRangeScan 250.00 cop[tikv] table:t1, index:ia(a) range:["测试 ","测试!"), keep order:false, stats:pseudo +explain format = brief select *,length(a) from t1 where a like '测试'; +id estRows task access object operator info +Projection 10.00 root planner__core__issuetest__planner_issue.t1.a, length(planner__core__issuetest__planner_issue.t1.a)->Column#3 +└─UnionScan 10.00 root like(planner__core__issuetest__planner_issue.t1.a, "测试", 92) + └─IndexReader 10.00 root index:Selection + └─Selection 10.00 cop[tikv] like(planner__core__issuetest__planner_issue.t1.a, "测试", 92) + └─IndexRangeScan 10.00 cop[tikv] table:t1, index:ia(a) range:["测试","测试"], keep order:false, stats:pseudo +select *,length(a) from t1 where a like '测试 %'; +a length(a) +测试 8 +select *,length(a) from t1 where a like '测试'; +a length(a) +测试 6 +create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a)); +insert into t2 value('测试'),('测试 '); +explain format = brief select *,length(a) from t2 where a like '测试 %'; +id estRows task access object operator info +Projection 8000.00 root planner__core__issuetest__planner_issue.t2.a, length(to_binary(planner__core__issuetest__planner_issue.t2.a))->Column#3 +└─UnionScan 8000.00 root like(planner__core__issuetest__planner_issue.t2.a, "测试 %", 92) + └─TableReader 8000.00 root data:Selection + └─Selection 8000.00 cop[tikv] like(planner__core__issuetest__planner_issue.t2.a, "测试 %", 92) + └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo +explain format = brief select *,length(a) from t2 where a like '测试'; +id estRows task access object operator info +Projection 8000.00 root planner__core__issuetest__planner_issue.t2.a, length(to_binary(planner__core__issuetest__planner_issue.t2.a))->Column#3 +└─UnionScan 8000.00 root like(planner__core__issuetest__planner_issue.t2.a, "测试", 92) + └─TableReader 8000.00 root data:Selection + └─Selection 8000.00 cop[tikv] like(planner__core__issuetest__planner_issue.t2.a, "测试", 92) + └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo +select *,length(a) from t2 where a like '测试 %'; +a length(a) +测试 6 +select *,length(a) from t2 where a like '测试'; +a length(a) +测试 4 diff --git a/tests/integrationtest/t/planner/core/issuetest/planner_issue.test b/tests/integrationtest/t/planner/core/issuetest/planner_issue.test index 1b58c7c5046c7..372e9eb8a67a6 100644 --- a/tests/integrationtest/t/planner/core/issuetest/planner_issue.test +++ b/tests/integrationtest/t/planner/core/issuetest/planner_issue.test @@ -136,3 +136,18 @@ FROM t2 db LEFT JOIN tmp3 c2 ON c2.id = '1' LEFT JOIN tmp3 c3 ON c3.id = '1'; + +# https://github.com/pingcap/tidb/issues/48821 +drop table if exists t1, t2; +create table t1(a varchar(20) collate utf8mb4_bin, index ia(a)); +insert into t1 value('测试'),('测试 '); +explain format = brief select *,length(a) from t1 where a like '测试 %'; +explain format = brief select *,length(a) from t1 where a like '测试'; +select *,length(a) from t1 where a like '测试 %'; +select *,length(a) from t1 where a like '测试'; +create table t2(a varchar(20) collate gbk_chinese_ci, index ia(a)); +insert into t2 value('测试'),('测试 '); +explain format = brief select *,length(a) from t2 where a like '测试 %'; +explain format = brief select *,length(a) from t2 where a like '测试'; +select *,length(a) from t2 where a like '测试 %'; +select *,length(a) from t2 where a like '测试'; From e5cc5429785c73467c1a54647c979665db39455e Mon Sep 17 00:00:00 2001 From: time-and-fate <25057648+time-and-fate@users.noreply.github.com> Date: Fri, 24 Nov 2023 14:38:27 +0800 Subject: [PATCH 3/3] update comments --- pkg/util/ranger/checker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/util/ranger/checker.go b/pkg/util/ranger/checker.go index 2b06e1815401b..2c7812b6cdb3f 100644 --- a/pkg/util/ranger/checker.go +++ b/pkg/util/ranger/checker.go @@ -175,7 +175,7 @@ func (c *conditionChecker) checkLikeFunc(scalar *expression.ScalarFunction) (isA // In tidb's implementation, for PAD SPACE collations, the trailing spaces are removed in the index key. So we are // unable to distinguish 'xxx' from 'xxx ' by a single index range scan, and we may read more data than needed by // the `like` function. Therefore, a Selection is needed to filter the data. - // Since all collations, except for binary, implemented in tidb are PAD SPACE collations, we use a simple + // Since all collations, except for binary, implemented in tidb are PAD SPACE collations for now, we use a simple // collation != binary check here. if collation != charset.CollationBin { likeFuncReserve = true