From 408f574fd17730362dac0a6b966d4470df71cc1c Mon Sep 17 00:00:00 2001
From: Zongheng Yang <zongheng.y@gmail.com>
Date: Fri, 6 Jun 2014 13:51:06 -0700
Subject: [PATCH 1/6] SPARK-1704: Add CommandStrategy and
 ExplainCommandPhysical.

---
 .../org/apache/spark/sql/SQLContext.scala     |  6 ++
 .../spark/sql/execution/SparkStrategies.scala | 11 +++
 .../apache/spark/sql/execution/commands.scala | 68 +++++++++++++++++++
 .../apache/spark/sql/hive/HiveContext.scala   |  3 +-
 .../sql/hive/execution/HiveQuerySuite.scala   |  1 +
 5 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 043be58edc91b..a2d095debfc87 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -191,6 +191,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
     val sparkContext = self.sparkContext
 
     val strategies: Seq[Strategy] =
+      CommandStrategy(self) ::
       TakeOrdered ::
       PartialAggregation ::
       HashJoin ::
@@ -255,6 +256,11 @@ class SQLContext(@transient val sparkContext: SparkContext)
       Batch("Prepare Expressions", Once, new BindReferences[SparkPlan]) :: Nil
   }
 
+  // TODO: or should we make QueryExecution protected[sql]?
+  protected[sql] def mkQueryExecution(plan: LogicalPlan) = new QueryExecution {
+    val logical = plan
+  }
+
   /**
    * The primary workflow for executing relational queries using Spark.  Designed to allow easy
    * access to the intermediate phases of query execution for developers.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index cfa8bdae58b11..7a7ce66fd82d4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -217,4 +217,15 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case _ => Nil
     }
   }
+
+  // TODO: this should be merged with SPARK-1508's SetCommandStrategy
+  case class CommandStrategy(context: SQLContext) extends Strategy {
+    def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case logical.ExplainCommand(child) =>
+        val qe = context.mkQueryExecution(child)
+        Seq(execution.ExplainCommandPhysical(qe.executedPlan)(context))
+      case _ => Nil
+    }
+  }
+
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
new file mode 100644
index 0000000000000..85e0fa9594e03
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{SQLContext, Row}
+import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+/**
+ * :: DeveloperApi ::
+ */
+//@DeveloperApi
+//case class SetCommandPhysical(
+//    key: Option[String],
+//    value: Option[String],
+//    context: SQLContext)
+//  extends LeafNode {
+//
+//  "hi".split("=", 2)
+//
+//   def execute(): RDD[Row] = (key, value) match {
+//     case (Some(k), Some(v)) => context.emptyResult
+//     case (Some(k), None) =>
+//       val resultString = context.sqlConf.getOption(k) match {
+//         case Some(v) => s"$k=$v"
+//         case None => s"$k is undefined"
+//       }
+//       context.sparkContext.parallelize(Seq(new GenericRow(Array[Any](resultString))), 1)
+//     case (None, None) =>
+//       val pairs = context.sqlConf.getAll
+//       val rows = pairs.map { case (k, v) =>
+//         new GenericRow(Array[Any](s"$k=$v"))
+//       }.toSeq
+//       // Assume config parameters can fit into one split (machine) ;)
+//       context.sparkContext.parallelize(rows, 1)
+//     case _ => context.emptyResult
+//   }
+//
+//   def output: Seq[Attribute] = Seq.empty // TODO: right thing?
+//}
+
+
+case class ExplainCommandPhysical(child: SparkPlan)
+                                 (@transient context: SQLContext) extends UnaryNode {
+  def execute(): RDD[Row] = {
+    val lines = child.toString.split("\n").map(s => new GenericRow(Array[Any](s)))
+    context.sparkContext.parallelize(lines)
+  }
+
+  def output: Seq[Attribute] = child.output // right thing to do?
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index b21f24dad785d..b6648ab55d761 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -218,6 +218,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     val hiveContext = self
 
     override val strategies: Seq[Strategy] = Seq(
+      CommandStrategy(self),
       TakeOrdered,
       ParquetOperations,
       HiveTableScans,
@@ -303,7 +304,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
      */
     def stringResult(): Seq[String] = analyzed match {
       case NativeCommand(cmd) => runSqlHive(cmd)
-      case ExplainCommand(plan) => new QueryExecution { val logical = plan }.toString.split("\n")
+      case ExplainCommand(plan) => mkQueryExecution(plan).toString.split("\n")
       case query =>
         val result: Seq[Seq[Any]] = toRdd.collect().toSeq
         // We need the types so we can output struct field names
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 125cc18bfb2b5..c378b9da5fc9d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -159,4 +159,5 @@ class HiveQuerySuite extends HiveComparisonTest {
     hql("SHOW TABLES").toString
     hql("SELECT * FROM src").toString
   }
+
 }

From 439c6abb689b3859a63ec2b850586b362dc3a3a1 Mon Sep 17 00:00:00 2001
From: Zongheng Yang <zongheng.y@gmail.com>
Date: Fri, 6 Jun 2014 17:02:45 -0700
Subject: [PATCH 2/6] Minor cleanups.

---
 .../apache/spark/sql/execution/commands.scala | 36 -------------------
 1 file changed, 36 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 85e0fa9594e03..575778f4ecda4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -17,45 +17,9 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{SQLContext, Row}
 import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-
-/**
- * :: DeveloperApi ::
- */
-//@DeveloperApi
-//case class SetCommandPhysical(
-//    key: Option[String],
-//    value: Option[String],
-//    context: SQLContext)
-//  extends LeafNode {
-//
-//  "hi".split("=", 2)
-//
-//   def execute(): RDD[Row] = (key, value) match {
-//     case (Some(k), Some(v)) => context.emptyResult
-//     case (Some(k), None) =>
-//       val resultString = context.sqlConf.getOption(k) match {
-//         case Some(v) => s"$k=$v"
-//         case None => s"$k is undefined"
-//       }
-//       context.sparkContext.parallelize(Seq(new GenericRow(Array[Any](resultString))), 1)
-//     case (None, None) =>
-//       val pairs = context.sqlConf.getAll
-//       val rows = pairs.map { case (k, v) =>
-//         new GenericRow(Array[Any](s"$k=$v"))
-//       }.toSeq
-//       // Assume config parameters can fit into one split (machine) ;)
-//       context.sparkContext.parallelize(rows, 1)
-//     case _ => context.emptyResult
-//   }
-//
-//   def output: Seq[Attribute] = Seq.empty // TODO: right thing?
-//}
-
 
 case class ExplainCommandPhysical(child: SparkPlan)
                                  (@transient context: SQLContext) extends UnaryNode {

From 4318fd7ed84f3f08f3e74d0b70befc01c3d77971 Mon Sep 17 00:00:00 2001
From: Zongheng Yang <zongheng.y@gmail.com>
Date: Fri, 6 Jun 2014 23:46:21 -0700
Subject: [PATCH 3/6] Make all output one Row.

---
 .../main/scala/org/apache/spark/sql/execution/commands.scala  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 575778f4ecda4..2441bc5f6c876 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -24,8 +24,8 @@ import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
 case class ExplainCommandPhysical(child: SparkPlan)
                                  (@transient context: SQLContext) extends UnaryNode {
   def execute(): RDD[Row] = {
-    val lines = child.toString.split("\n").map(s => new GenericRow(Array[Any](s)))
-    context.sparkContext.parallelize(lines)
+    val planString = new GenericRow(Array[Any](child.toString))
+    context.sparkContext.parallelize(Seq(planString))
   }
 
   def output: Seq[Attribute] = child.output // right thing to do?

From 719ada9e90787cc4fe9e2293f2a056b393348e08 Mon Sep 17 00:00:00 2001
From: Zongheng Yang <zongheng.y@gmail.com>
Date: Sat, 7 Jun 2014 10:38:40 -0700
Subject: [PATCH 4/6] Override otherCopyArgs for ExplainCommandPhysical.

---
 .../main/scala/org/apache/spark/sql/execution/commands.scala    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 2441bc5f6c876..5afa5e43a31ed 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -29,4 +29,6 @@ case class ExplainCommandPhysical(child: SparkPlan)
   }
 
   def output: Seq[Attribute] = child.output // right thing to do?
+
+  override def otherCopyArgs = context :: Nil
 }

From 1bfa379024e518fc7dc99eaa10894fd48740bf45 Mon Sep 17 00:00:00 2001
From: Zongheng Yang <zongheng.y@gmail.com>
Date: Sat, 7 Jun 2014 11:35:08 -0700
Subject: [PATCH 5/6] Modify output().

---
 .../spark/sql/catalyst/plans/logical/LogicalPlan.scala    | 8 +++++---
 .../org/apache/spark/sql/execution/SparkStrategies.scala  | 2 +-
 .../scala/org/apache/spark/sql/execution/commands.scala   | 4 +---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 2b8fbdcde9d37..4f641cd3a656b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.types.StructType
+import org.apache.spark.sql.catalyst.types.{StringType, StructType}
 import org.apache.spark.sql.catalyst.trees
 
 abstract class LogicalPlan extends QueryPlan[LogicalPlan] {
@@ -102,7 +102,7 @@ abstract class LeafNode extends LogicalPlan with trees.LeafNode[LogicalPlan] {
  */
 abstract class Command extends LeafNode {
   self: Product =>
-  def output = Seq.empty
+  def output: Seq[Attribute] = Seq.empty
 }
 
 /**
@@ -115,7 +115,9 @@ case class NativeCommand(cmd: String) extends Command
  * Returned by a parser when the users only wants to see what query plan would be executed, without
  * actually performing the execution.
  */
-case class ExplainCommand(plan: LogicalPlan) extends Command
+case class ExplainCommand(plan: LogicalPlan) extends Command {
+  override def output = Seq(AttributeReference("plan", StringType, nullable = false)())
+}
 
 /**
  * A logical plan node with single child.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 7a7ce66fd82d4..a516cf478df85 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -223,7 +223,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.ExplainCommand(child) =>
         val qe = context.mkQueryExecution(child)
-        Seq(execution.ExplainCommandPhysical(qe.executedPlan)(context))
+        Seq(execution.ExplainCommandPhysical(qe.executedPlan, plan.output)(context))
       case _ => Nil
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 5afa5e43a31ed..5371d2f479e73 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -21,14 +21,12 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{SQLContext, Row}
 import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
 
-case class ExplainCommandPhysical(child: SparkPlan)
+case class ExplainCommandPhysical(child: SparkPlan, output: Seq[Attribute])
                                  (@transient context: SQLContext) extends UnaryNode {
   def execute(): RDD[Row] = {
     val planString = new GenericRow(Array[Any](child.toString))
     context.sparkContext.parallelize(Seq(planString))
   }
 
-  def output: Seq[Attribute] = child.output // right thing to do?
-
   override def otherCopyArgs = context :: Nil
 }

From 5b7911f6d8564ace5bd88409a2e01a8bbfa355f7 Mon Sep 17 00:00:00 2001
From: Zongheng Yang <zongheng.y@gmail.com>
Date: Mon, 9 Jun 2014 12:06:46 -0700
Subject: [PATCH 6/6] Add a regression test.

---
 .../spark/sql/hive/execution/HiveQuerySuite.scala     | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index c378b9da5fc9d..c56eee258047f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.hive.test.TestHive
 
 /**
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
@@ -160,4 +161,14 @@ class HiveQuerySuite extends HiveComparisonTest {
     hql("SELECT * FROM src").toString
   }
 
+  test("SPARK-1704: Explain commands as a SchemaRDD") {
+    hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+    val rdd = hql("explain select key, count(value) from src group by key")
+    assert(rdd.collect().size == 1)
+    assert(rdd.toString.contains("ExplainCommand"))
+    assert(rdd.filter(row => row.toString.contains("ExplainCommand")).collect().size == 0,
+      "actual contents of the result should be the plans of the query to be explained")
+    TestHive.reset()
+  }
+
 }