From 21db91b9dab966ebde3c80e3fa1a9a032c56fcf6 Mon Sep 17 00:00:00 2001 From: Arenatlx <314806019@qq.com> Date: Mon, 20 Nov 2023 08:55:10 +0800 Subject: [PATCH] planner: fix issue 48643 that aggDesc modification will change the referrence (#48662) close pingcap/tidb#48643 --- .../testdata/enforce_mpp_suite_out.json | 101 +++++++++--------- planner/core/exhaust_physical_plans.go | 2 + planner/core/integration_test.go | 20 ++++ planner/core/physical_plans.go | 16 ++- 4 files changed, 83 insertions(+), 56 deletions(-) diff --git a/planner/core/casetest/testdata/enforce_mpp_suite_out.json b/planner/core/casetest/testdata/enforce_mpp_suite_out.json index 74a9ffbdf6685..7b069227ae1df 100644 --- a/planner/core/casetest/testdata/enforce_mpp_suite_out.json +++ b/planner/core/casetest/testdata/enforce_mpp_suite_out.json @@ -669,52 +669,48 @@ { "SQL": "EXPLAIN select o.o_id, count(*) from c, o where c.c_id=o.c_id group by o.o_id; -- 2. test agg push down, group by non-join column", "Plan": [ - "TableReader_84 8000.00 root MppVersion: 1, data:ExchangeSender_83", - "└─ExchangeSender_83 8000.00 mpp[tiflash] ExchangeType: PassThrough", + "TableReader_78 8000.00 root MppVersion: 1, data:ExchangeSender_77", + "└─ExchangeSender_77 8000.00 mpp[tiflash] ExchangeType: PassThrough", " └─Projection_10 8000.00 mpp[tiflash] test.o.o_id, Column#6", - " └─Projection_79 8000.00 mpp[tiflash] Column#6, test.o.o_id", - " └─HashAgg_80 8000.00 mpp[tiflash] group by:test.o.o_id, funcs:sum(Column#25)->Column#6, funcs:firstrow(Column#26)->test.o.o_id", - " └─ExchangeReceiver_82 8000.00 mpp[tiflash] ", - " └─ExchangeSender_81 8000.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: test.o.o_id, collate: binary]", - " └─HashAgg_76 8000.00 mpp[tiflash] group by:Column#29, funcs:sum(Column#27)->Column#25, funcs:firstrow(Column#28)->Column#26", - " └─Projection_85 9990.00 mpp[tiflash] cast(Column#7, decimal(20,0) BINARY)->Column#27, Column#8, test.o.o_id", - " └─HashJoin_78 9990.00 mpp[tiflash] inner join, equal:[eq(test.c.c_id, test.o.c_id)]", - " ├─ExchangeReceiver_34(Build) 8000.00 mpp[tiflash] ", - " │ └─ExchangeSender_33 8000.00 mpp[tiflash] ExchangeType: Broadcast, Compression: FAST", - " │ └─Projection_29 8000.00 mpp[tiflash] Column#7, Column#8, test.o.o_id, test.o.c_id", - " │ └─HashAgg_30 8000.00 mpp[tiflash] group by:test.o.c_id, test.o.o_id, funcs:sum(Column#9)->Column#7, funcs:firstrow(test.o.o_id)->Column#8, funcs:firstrow(test.o.o_id)->test.o.o_id, funcs:firstrow(test.o.c_id)->test.o.c_id", - " │ └─ExchangeReceiver_32 8000.00 mpp[tiflash] ", - " │ └─ExchangeSender_31 8000.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: test.o.o_id, collate: binary], [name: test.o.c_id, collate: binary]", - " │ └─HashAgg_21 8000.00 mpp[tiflash] group by:test.o.c_id, test.o.o_id, funcs:count(1)->Column#9", - " │ └─TableFullScan_28 10000.00 mpp[tiflash] table:o keep order:false, stats:pseudo", - " └─Selection_19(Probe) 9990.00 mpp[tiflash] not(isnull(test.c.c_id))", - " └─TableFullScan_18 10000.00 mpp[tiflash] table:c pushed down filter:empty, keep order:false, stats:pseudo" + " └─Projection_76 8000.00 mpp[tiflash] Column#6, test.o.o_id", + " └─HashAgg_75 8000.00 mpp[tiflash] group by:test.o.o_id, funcs:sum(Column#7)->Column#6, funcs:firstrow(Column#8)->test.o.o_id", + " └─ExchangeReceiver_71 9990.00 mpp[tiflash] ", + " └─ExchangeSender_70 9990.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: test.o.o_id, collate: binary]", + " └─HashJoin_69 9990.00 mpp[tiflash] inner join, equal:[eq(test.c.c_id, test.o.c_id)]", + " ├─ExchangeReceiver_34(Build) 8000.00 mpp[tiflash] ", + " │ └─ExchangeSender_33 8000.00 mpp[tiflash] ExchangeType: Broadcast, Compression: FAST", + " │ └─Projection_29 8000.00 mpp[tiflash] Column#7, Column#8, test.o.o_id, test.o.c_id", + " │ └─HashAgg_30 8000.00 mpp[tiflash] group by:test.o.c_id, test.o.o_id, funcs:sum(Column#9)->Column#7, funcs:firstrow(test.o.o_id)->Column#8, funcs:firstrow(test.o.o_id)->test.o.o_id, funcs:firstrow(test.o.c_id)->test.o.c_id", + " │ └─ExchangeReceiver_32 8000.00 mpp[tiflash] ", + " │ └─ExchangeSender_31 8000.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: test.o.o_id, collate: binary], [name: test.o.c_id, collate: binary]", + " │ └─HashAgg_21 8000.00 mpp[tiflash] group by:test.o.c_id, test.o.o_id, funcs:count(1)->Column#9", + " │ └─TableFullScan_28 10000.00 mpp[tiflash] table:o keep order:false, stats:pseudo", + " └─Selection_19(Probe) 9990.00 mpp[tiflash] not(isnull(test.c.c_id))", + " └─TableFullScan_18 10000.00 mpp[tiflash] table:c pushed down filter:empty, keep order:false, stats:pseudo" ], "Warn": null }, { "SQL": "EXPLAIN select o.c_id, count(*) from c, o where c.c_id=o.c_id group by o.c_id; -- 3. test agg push down, group by join column", "Plan": [ - "TableReader_84 8000.00 root MppVersion: 1, data:ExchangeSender_83", - "└─ExchangeSender_83 8000.00 mpp[tiflash] ExchangeType: PassThrough", + "TableReader_78 8000.00 root MppVersion: 1, data:ExchangeSender_77", + "└─ExchangeSender_77 8000.00 mpp[tiflash] ExchangeType: PassThrough", " └─Projection_10 8000.00 mpp[tiflash] test.o.c_id, Column#6", - " └─Projection_79 8000.00 mpp[tiflash] Column#6, test.o.c_id", - " └─HashAgg_80 8000.00 mpp[tiflash] group by:test.o.c_id, funcs:sum(Column#21)->Column#6, funcs:firstrow(Column#22)->test.o.c_id", - " └─ExchangeReceiver_82 8000.00 mpp[tiflash] ", - " └─ExchangeSender_81 8000.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: test.o.c_id, collate: binary]", - " └─HashAgg_76 8000.00 mpp[tiflash] group by:Column#25, funcs:sum(Column#23)->Column#21, funcs:firstrow(Column#24)->Column#22", - " └─Projection_85 9990.00 mpp[tiflash] cast(Column#7, decimal(20,0) BINARY)->Column#23, Column#8, test.o.c_id", - " └─HashJoin_78 9990.00 mpp[tiflash] inner join, equal:[eq(test.c.c_id, test.o.c_id)]", - " ├─ExchangeReceiver_34(Build) 8000.00 mpp[tiflash] ", - " │ └─ExchangeSender_33 8000.00 mpp[tiflash] ExchangeType: Broadcast, Compression: FAST", - " │ └─Projection_29 8000.00 mpp[tiflash] Column#7, Column#8, test.o.c_id", - " │ └─HashAgg_30 8000.00 mpp[tiflash] group by:test.o.c_id, funcs:sum(Column#9)->Column#7, funcs:firstrow(test.o.c_id)->Column#8, funcs:firstrow(test.o.c_id)->test.o.c_id", - " │ └─ExchangeReceiver_32 8000.00 mpp[tiflash] ", - " │ └─ExchangeSender_31 8000.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: test.o.c_id, collate: binary]", - " │ └─HashAgg_21 8000.00 mpp[tiflash] group by:test.o.c_id, funcs:count(1)->Column#9", - " │ └─TableFullScan_28 10000.00 mpp[tiflash] table:o keep order:false, stats:pseudo", - " └─Selection_19(Probe) 9990.00 mpp[tiflash] not(isnull(test.c.c_id))", - " └─TableFullScan_18 10000.00 mpp[tiflash] table:c pushed down filter:empty, keep order:false, stats:pseudo" + " └─Projection_76 8000.00 mpp[tiflash] Column#6, test.o.c_id", + " └─HashAgg_75 8000.00 mpp[tiflash] group by:test.o.c_id, funcs:sum(Column#7)->Column#6, funcs:firstrow(Column#8)->test.o.c_id", + " └─ExchangeReceiver_71 9990.00 mpp[tiflash] ", + " └─ExchangeSender_70 9990.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: test.o.c_id, collate: binary]", + " └─HashJoin_69 9990.00 mpp[tiflash] inner join, equal:[eq(test.c.c_id, test.o.c_id)]", + " ├─ExchangeReceiver_34(Build) 8000.00 mpp[tiflash] ", + " │ └─ExchangeSender_33 8000.00 mpp[tiflash] ExchangeType: Broadcast, Compression: FAST", + " │ └─Projection_29 8000.00 mpp[tiflash] Column#7, Column#8, test.o.c_id", + " │ └─HashAgg_30 8000.00 mpp[tiflash] group by:test.o.c_id, funcs:sum(Column#9)->Column#7, funcs:firstrow(test.o.c_id)->Column#8, funcs:firstrow(test.o.c_id)->test.o.c_id", + " │ └─ExchangeReceiver_32 8000.00 mpp[tiflash] ", + " │ └─ExchangeSender_31 8000.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: test.o.c_id, collate: binary]", + " │ └─HashAgg_21 8000.00 mpp[tiflash] group by:test.o.c_id, funcs:count(1)->Column#9", + " │ └─TableFullScan_28 10000.00 mpp[tiflash] table:o keep order:false, stats:pseudo", + " └─Selection_19(Probe) 9990.00 mpp[tiflash] not(isnull(test.c.c_id))", + " └─TableFullScan_18 10000.00 mpp[tiflash] table:c pushed down filter:empty, keep order:false, stats:pseudo" ], "Warn": null }, @@ -727,21 +723,20 @@ " └─ExchangeSender 10.00 mpp[tiflash] ExchangeType: PassThrough", " └─TopN 10.00 mpp[tiflash] Column#7, offset:0, count:10", " └─Projection 16000.00 mpp[tiflash] Column#9, Column#7", - " └─HashAgg 16000.00 mpp[tiflash] group by:Column#40, funcs:sum(Column#38)->Column#9, funcs:firstrow(Column#39)->Column#7", - " └─Projection 16000.00 mpp[tiflash] cast(Column#10, decimal(20,0) BINARY)->Column#38, Column#11, Column#7", - " └─ExchangeReceiver 16000.00 mpp[tiflash] ", - " └─ExchangeSender 16000.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: Column#7, collate: binary]", - " └─Union 16000.00 mpp[tiflash] ", - " ├─HashAgg 8000.00 mpp[tiflash] group by:test.t.a, funcs:sum(Column#30)->Column#10, funcs:firstrow(test.t.a)->Column#11, funcs:firstrow(test.t.a)->Column#7", - " │ └─ExchangeReceiver 8000.00 mpp[tiflash] ", - " │ └─ExchangeSender 8000.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: test.t.a, collate: binary]", - " │ └─HashAgg 8000.00 mpp[tiflash] group by:test.t.a, funcs:count(1)->Column#30", - " │ └─TableFullScan 10000.00 mpp[tiflash] table:t keep order:false, stats:pseudo", - " └─HashAgg 8000.00 mpp[tiflash] group by:test.t.a, funcs:sum(Column#33)->Column#10, funcs:firstrow(test.t.a)->Column#11, funcs:firstrow(test.t.a)->Column#7", - " └─ExchangeReceiver 8000.00 mpp[tiflash] ", - " └─ExchangeSender 8000.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: test.t.a, collate: binary]", - " └─HashAgg 8000.00 mpp[tiflash] group by:test.t.a, funcs:count(1)->Column#33", - " └─TableFullScan 10000.00 mpp[tiflash] table:t keep order:false, stats:pseudo" + " └─HashAgg 16000.00 mpp[tiflash] group by:Column#7, funcs:sum(Column#10)->Column#9, funcs:firstrow(Column#11)->Column#7", + " └─ExchangeReceiver 16000.00 mpp[tiflash] ", + " └─ExchangeSender 16000.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: Column#7, collate: binary]", + " └─Union 16000.00 mpp[tiflash] ", + " ├─HashAgg 8000.00 mpp[tiflash] group by:test.t.a, funcs:sum(Column#30)->Column#10, funcs:firstrow(test.t.a)->Column#11, funcs:firstrow(test.t.a)->Column#7", + " │ └─ExchangeReceiver 8000.00 mpp[tiflash] ", + " │ └─ExchangeSender 8000.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: test.t.a, collate: binary]", + " │ └─HashAgg 8000.00 mpp[tiflash] group by:test.t.a, funcs:count(1)->Column#30", + " │ └─TableFullScan 10000.00 mpp[tiflash] table:t keep order:false, stats:pseudo", + " └─HashAgg 8000.00 mpp[tiflash] group by:test.t.a, funcs:sum(Column#33)->Column#10, funcs:firstrow(test.t.a)->Column#11, funcs:firstrow(test.t.a)->Column#7", + " └─ExchangeReceiver 8000.00 mpp[tiflash] ", + " └─ExchangeSender 8000.00 mpp[tiflash] ExchangeType: HashPartition, Compression: FAST, Hash Cols: [name: test.t.a, collate: binary]", + " └─HashAgg 8000.00 mpp[tiflash] group by:test.t.a, funcs:count(1)->Column#33", + " └─TableFullScan 10000.00 mpp[tiflash] table:t keep order:false, stats:pseudo" ], "Warn": null } diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index 376e9e0afd6bc..d59ada0a22b3c 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -3157,7 +3157,9 @@ func (la *LogicalAggregation) tryToGetMppHashAggs(prop *property.PhysicalPropert finalAggAdjust := func(aggFuncs []*aggregation.AggFuncDesc) { for i, agg := range aggFuncs { if agg.Mode == aggregation.FinalMode && agg.Name == ast.AggFuncCount { + oldFt := agg.RetTp aggFuncs[i], _ = aggregation.NewAggFuncDesc(la.SCtx(), ast.AggFuncSum, agg.Args, false) + aggFuncs[i].RetTp = oldFt } } } diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index 7bf64614c4a2b..36d7ec6728be4 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -5331,6 +5331,26 @@ func TestIssue43116(t *testing.T) { tk.MustQuery("show warnings").Check(testkit.Rows("Warning 1105 Memory capacity of 111 bytes for 'tidb_opt_range_max_size' exceeded when building ranges. Less accurate ranges such as full range are chosen")) } +func TestIssue48643(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t, tp") + tk.MustExec("create table t\n(pk1 varchar(128),\n pk2 varchar(128),\n pk3 varchar(128),\n data varchar(128),\n primary key (pk1, pk2, pk3))") + tk.MustExec("insert into t values (UUID(), UUID(), uuid(), uuid()), (uuid(), uuid(), uuid(), uuid())") + tk.MustExec("insert into t select uuid(), uuid(), uuid(), uuid() from t, t t2, t t3, t t4") + tk.MustExec("insert into t select t.pk1, uuid(), uuid(), uuid() from t, t t2, t t3, t t4") + tk.MustQuery("select count(*) from t;").Check(testkit.Rows("104994")) + tk.MustQuery("select count(distinct pk1) from t;").Check(testkit.Rows("18")) + res1 := tk.MustQuery("select pk1, count(*) from t group by pk1 order by count(*), pk1 limit 10;").Sort() + + tk.MustExec("create table tp\n(pk1 varchar(128),\n pk2 varchar(128),\n pk3 varchar(128),\n data varchar(128),\n primary key (pk1, pk2, pk3))\npartition by key(pk1) partitions 128;") + tk.MustExec("insert into tp select * from t;") + tk.MustQuery("select count(*) from tp;").Check(testkit.Rows("104994")) + tk.MustQuery("select count(distinct pk1) from tp;").Check(testkit.Rows("18")) + tk.MustQuery("select pk1, count(*) from tp group by pk1 order by count(*), pk1 limit 10;").Sort().Check(res1.Rows()) +} + func TestIssue45033(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) diff --git a/planner/core/physical_plans.go b/planner/core/physical_plans.go index 41b30fb3cb1de..76f2527fafd2d 100644 --- a/planner/core/physical_plans.go +++ b/planner/core/physical_plans.go @@ -1890,10 +1890,20 @@ func (p *PhysicalHashAgg) MemoryUsage() (sum int64) { // NewPhysicalHashAgg creates a new PhysicalHashAgg from a LogicalAggregation. func NewPhysicalHashAgg(la *LogicalAggregation, newStats *property.StatsInfo, prop *property.PhysicalProperty) *PhysicalHashAgg { + newGbyItems := make([]expression.Expression, len(la.GroupByItems)) + copy(newGbyItems, la.GroupByItems) + newAggFuncs := make([]*aggregation.AggFuncDesc, len(la.AggFuncs)) + // There's some places that rewrites the aggFunc in-place. + // I clone it first. + // It needs a well refactor to make sure that the physical optimize should not change the things of logical plan. + // It's bad for cascades + for i, aggFunc := range la.AggFuncs { + newAggFuncs[i] = aggFunc.Clone() + } agg := basePhysicalAgg{ - GroupByItems: la.GroupByItems, - AggFuncs: la.AggFuncs, - }.initForHash(la.ctx, newStats, la.blockOffset, prop) + GroupByItems: newGbyItems, + AggFuncs: newAggFuncs, + }.initForHash(la.SCtx(), newStats, la.SelectBlockOffset(), prop) return agg }