From 96fce3a911bb7d026898de9dfbb379396699ac70 Mon Sep 17 00:00:00 2001 From: Mustafa Akur Date: Fri, 5 Jan 2024 10:20:33 +0300 Subject: [PATCH] Add test case for unnecessary hash when target is 1 --- .../enforce_distribution.rs | 3 +- datafusion/sqllogictest/test_files/join.slt | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index bf5aa7d02272..1c86c4c3205a 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -927,9 +927,8 @@ fn add_hash_on_top( n_target: usize, repartition_beneficial_stats: bool, ) -> Result { - let partition_count = input.plan.output_partitioning().partition_count(); // Early return if hash repartition is unnecessary - if n_target == partition_count && n_target == 1 { + if n_target == 1 { return Ok(input); } diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt index c9dd7ca604ad..ca9b918ff3ee 100644 --- a/datafusion/sqllogictest/test_files/join.slt +++ b/datafusion/sqllogictest/test_files/join.slt @@ -626,6 +626,38 @@ Alice 100 Alice 1 Alice 50 Alice 2 Alice 100 Alice 2 +statement ok +set datafusion.execution.target_partitions = 1; + +statement ok +set datafusion.optimizer.repartition_joins = true; + +# make sure when target partition is 1, hash repartition is not added +# to the final plan. +query TT +EXPLAIN SELECT * +FROM t1, +t1 as t2 +WHERE t1.a=t2.a; +---- +logical_plan +Inner Join: t1.a = t2.a +--TableScan: t1 projection=[a, b] +--SubqueryAlias: t2 +----TableScan: t1 projection=[a, b] +physical_plan +CoalesceBatchesExec: target_batch_size=8192 +--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)] +----MemoryExec: partitions=1, partition_sizes=[1] +----MemoryExec: partitions=1, partition_sizes=[1] + +# Reset the configs to old values +statement ok +set datafusion.execution.target_partitions = 4; + +statement ok +set datafusion.optimizer.repartition_joins = false; + statement ok DROP TABLE t1;