Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: fix bug for logical rule outer join elimination #13947

Merged
merged 2 commits into from
Dec 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions cmd/explaintest/r/explain_easy.result
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,39 @@ explain select distinct t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
id count task operator info
IndexReader_11 10000.00 root index:IndexScan_10
└─IndexScan_10 10000.00 cop[tikv] table:t1, index:a, b, range:[NULL,+inf], keep order:false, stats:pseudo
CREATE TABLE `test01` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`stat_date` int(11) NOT NULL DEFAULT '0',
`show_date` varchar(20) NOT NULL DEFAULT '',
`region_id` bigint(20) unsigned NOT NULL DEFAULT '0',
`period` tinyint(3) unsigned NOT NULL DEFAULT '0',
`registration_num` bigint(20) unsigned NOT NULL DEFAULT '0',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
CREATE TABLE `test02` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`region_name` varchar(128) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
EXPLAIN SELECT COUNT(1) FROM (SELECT COALESCE(b.region_name, '不详') region_name, SUM(a.registration_num) registration_num FROM (SELECT stat_date, show_date, region_id, 0 registration_num FROM test01 WHERE period = 1 AND stat_date >= 20191202 AND stat_date <= 20191202 UNION ALL SELECT stat_date, show_date, region_id, registration_num registration_num FROM test01 WHERE period = 1 AND stat_date >= 20191202 AND stat_date <= 20191202) a LEFT JOIN test02 b ON a.region_id = b.id WHERE registration_num > 0 AND a.stat_date >= '20191202' AND a.stat_date <= '20191202' GROUP BY a.stat_date , a.show_date , COALESCE(b.region_name, '不详') ) JLS;
id count task operator info
StreamAgg_22 1.00 root funcs:count(1)->Column#22
└─HashAgg_25 1.00 root group by:Column#31, Column#32, Column#33, funcs:firstrow(1)->Column#23
└─Projection_50 0.02 root Column#14, Column#15, coalesce(test.test02.region_name, 不详)->Column#33
└─IndexMergeJoin_32 0.02 root left outer join, inner:TableReader_30, outer key:Column#16, inner key:test.test02.id
├─Union_37 0.02 root
│ ├─Projection_38 0.01 root test.test01.stat_date, test.test01.show_date, test.test01.region_id, cast(0)->Column#17
│ │ └─Selection_39 0.01 root gt(cast(0), 0)
│ │ └─TableReader_42 0.01 root data:Selection_41
│ │ └─Selection_41 0.01 cop[tikv] eq(test.test01.period, 1), ge(test.test01.stat_date, 20191202), ge(test.test01.stat_date, 20191202), le(test.test01.stat_date, 20191202), le(test.test01.stat_date, 20191202)
│ │ └─TableScan_40 10000.00 cop[tikv] table:test01, range:[-inf,+inf], keep order:false, stats:pseudo
│ └─Projection_43 0.01 root test.test01.stat_date, test.test01.show_date, test.test01.region_id, cast(test.test01.registration_num)->Column#17
│ └─Selection_44 0.01 root gt(cast(test.test01.registration_num), 0)
│ └─TableReader_47 0.01 root data:Selection_46
│ └─Selection_46 0.01 cop[tikv] eq(test.test01.period, 1), ge(test.test01.stat_date, 20191202), ge(test.test01.stat_date, 20191202), le(test.test01.stat_date, 20191202), le(test.test01.stat_date, 20191202)
│ └─TableScan_45 10000.00 cop[tikv] table:test01, range:[-inf,+inf], keep order:false, stats:pseudo
└─TableReader_30 1.00 root data:TableScan_29
└─TableScan_29 1.00 cop[tikv] table:b, range: decided by [Column#16], keep order:true, stats:pseudo
drop table if exists t;
create table t(a int, nb int not null, nc int not null);
explain select ifnull(a, 0) from t;
Expand Down
18 changes: 18 additions & 0 deletions cmd/explaintest/t/explain_easy.test
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,24 @@ create table t2(a int, b int, c int, primary key(a));
explain select t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;
explain select distinct t1.a, t1.b from t1 left outer join t2 on t1.a = t2.a;

CREATE TABLE `test01` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`stat_date` int(11) NOT NULL DEFAULT '0',
`show_date` varchar(20) NOT NULL DEFAULT '',
`region_id` bigint(20) unsigned NOT NULL DEFAULT '0',
`period` tinyint(3) unsigned NOT NULL DEFAULT '0',
`registration_num` bigint(20) unsigned NOT NULL DEFAULT '0',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;

CREATE TABLE `test02` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`region_name` varchar(128) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;

EXPLAIN SELECT COUNT(1) FROM (SELECT COALESCE(b.region_name, '不详') region_name, SUM(a.registration_num) registration_num FROM (SELECT stat_date, show_date, region_id, 0 registration_num FROM test01 WHERE period = 1 AND stat_date >= 20191202 AND stat_date <= 20191202 UNION ALL SELECT stat_date, show_date, region_id, registration_num registration_num FROM test01 WHERE period = 1 AND stat_date >= 20191202 AND stat_date <= 20191202) a LEFT JOIN test02 b ON a.region_id = b.id WHERE registration_num > 0 AND a.stat_date >= '20191202' AND a.stat_date <= '20191202' GROUP BY a.stat_date , a.show_date , COALESCE(b.region_name, '不详') ) JLS;

# https://github.com/pingcap/tidb/issues/7918
drop table if exists t;
create table t(a int, nb int not null, nc int not null);
Expand Down
5 changes: 4 additions & 1 deletion planner/core/rule_join_elimination.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,10 @@ func (o *outerJoinEliminator) doOptimize(p LogicalPlan, aggCols []*expression.Co
parentCols = append(parentCols, expression.ExtractColumns(expr)...)
}
case *LogicalAggregation:
parentCols = append(parentCols[:0], x.groupByCols...)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A question won't block merging this pr: could we just remove the groupByCols from the Aggregation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can't. groupByCols are used to check whether aggregation can keep order when generating stream agg.

parentCols = parentCols[:0]
for _, groupByItem := range x.GroupByItems {
parentCols = append(parentCols, expression.ExtractColumns(groupByItem)...)
}
for _, aggDesc := range x.AggFuncs {
for _, expr := range aggDesc.Args {
parentCols = append(parentCols, expression.ExtractColumns(expr)...)
Expand Down