apache · mcdull-zhang · Nov 21, 2023 · Nov 22, 2023 · JoshRosen · Nov 22, 2023
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -138,9 +138,8 @@ trait HashJoin extends JoinCodegenSupport {
     UnsafeProjection.create(streamedBoundKeys)
 
   @transient protected[this] lazy val boundCondition = if (condition.isDefined) {
-    if (joinType == FullOuter && buildSide == BuildLeft) {
-      // Put join left side before right side. This is to be consistent with
-      // `ShuffledHashJoinExec.fullOuterJoin`.
+    if ((joinType == FullOuter || joinType == LeftOuter) && buildSide == BuildLeft) {
+      // Put join left side before right side. This is to be consistent with ShuffledHashJoinExec.
 protected lazy val (buildPlan, streamedPlan) = buildSide match { 
   case BuildLeft => (left, right) 
   case BuildRight => (right, left) 
 } 
 @transient private[this] lazy val boundCondition = if (condition.isDefined) { 
   newPredicate(condition.getOrElse(Literal(true)), left.output ++ right.output) 
 } else { 
   (r: InternalRow) => true 
 } 
 protected lazy val (buildPlan, streamedPlan) = buildSide match { 
   case BuildLeft => (left, right) 
   case BuildRight => (right, left) 
 } 
 @transient private[this] lazy val boundCondition = if (condition.isDefined) { 
   newPredicate(condition.getOrElse(Literal(true)), left.output ++ right.output) 
 } else { 
   (r: InternalRow) => true 
 } 
       Predicate.create(condition.get, buildPlan.output ++ streamedPlan.output).eval _
     } else {
       Predicate.create(condition.get, streamedPlan.output ++ buildPlan.output).eval _

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -1755,4 +1755,27 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
       cached.unpersist()
     }
   }
+
+  test("SPARK-46037: When Left Join build Left, ShuffledHashJoinExec may " +
+    "result in incorrect results") {
+    withSQLConf(SQLConf.ENABLE_BUILD_SIDE_OUTER_SHUFFLED_HASH_JOIN_CODEGEN.key -> "false") {
+      val df1 = sql(
+        """
+          |SELECT /*+ SHUFFLE_HASH(t1) */ *
+          |FROM testData t1
+          |LEFT OUTER JOIN
+          |testData2 t2
+          |ON key = a AND concat(value, b) = '12'
+          |""".stripMargin)
+      val df2 = sql(
+        """
+          |SELECT /*+ SHUFFLE_MERGE(t1) */ *
+          |FROM testData t1
+          |LEFT OUTER JOIN
+          |testData2 t2
+          |ON key = a AND concat(value, b) = '12'
+          |""".stripMargin)
+      checkAnswer(df1, df2.collect())
+    }
+  }
 }