SPARK-1704: Add CommandStrategy and ExplainCommandPhysical.

pdeyhim · Jun 6, 2014 · 408f574 · 408f574
1 parent 8d21056
commit 408f574
Show file tree

Hide file tree

Showing 5 changed files with 88 additions and 1 deletion.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -191,6 +191,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
     val sparkContext = self.sparkContext
 
     val strategies: Seq[Strategy] =
+      CommandStrategy(self) ::
       TakeOrdered ::
       PartialAggregation ::
       HashJoin ::
@@ -255,6 +256,11 @@ class SQLContext(@transient val sparkContext: SparkContext)
       Batch("Prepare Expressions", Once, new BindReferences[SparkPlan]) :: Nil
   }
 
+  // TODO: or should we make QueryExecution protected[sql]?
+  protected[sql] def mkQueryExecution(plan: LogicalPlan) = new QueryExecution {
+    val logical = plan
+  }
+
   /**
    * The primary workflow for executing relational queries using Spark.  Designed to allow easy
    * access to the intermediate phases of query execution for developers.

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -217,4 +217,15 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case _ => Nil
     }
   }
+
+  // TODO: this should be merged with SPARK-1508's SetCommandStrategy
+  case class CommandStrategy(context: SQLContext) extends Strategy {
+    def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case logical.ExplainCommand(child) =>
+        val qe = context.mkQueryExecution(child)
+        Seq(execution.ExplainCommandPhysical(qe.executedPlan)(context))
+      case _ => Nil
+    }
+  }
+
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{SQLContext, Row}
+import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+/**
+ * :: DeveloperApi ::
+ */
+//@DeveloperApi
+//case class SetCommandPhysical(
+//    key: Option[String],
+//    value: Option[String],
+//    context: SQLContext)
+//  extends LeafNode {
+//
+//  "hi".split("=", 2)
+//
+//   def execute(): RDD[Row] = (key, value) match {
+//     case (Some(k), Some(v)) => context.emptyResult
+//     case (Some(k), None) =>
+//       val resultString = context.sqlConf.getOption(k) match {
+//         case Some(v) => s"$k=$v"
+//         case None => s"$k is undefined"
+//       }
+//       context.sparkContext.parallelize(Seq(new GenericRow(Array[Any](resultString))), 1)
+//     case (None, None) =>
+//       val pairs = context.sqlConf.getAll
+//       val rows = pairs.map { case (k, v) =>
+//         new GenericRow(Array[Any](s"$k=$v"))
+//       }.toSeq
+//       // Assume config parameters can fit into one split (machine) ;)
+//       context.sparkContext.parallelize(rows, 1)
+//     case _ => context.emptyResult
+//   }
+//
+//   def output: Seq[Attribute] = Seq.empty // TODO: right thing?
+//}
+
+
+case class ExplainCommandPhysical(child: SparkPlan)
+                                 (@transient context: SQLContext) extends UnaryNode {
+  def execute(): RDD[Row] = {
+    val lines = child.toString.split("\n").map(s => new GenericRow(Array[Any](s)))
+    context.sparkContext.parallelize(lines)
+  }
+
+  def output: Seq[Attribute] = child.output // right thing to do?
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -218,6 +218,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     val hiveContext = self
 
     override val strategies: Seq[Strategy] = Seq(
+      CommandStrategy(self),
       TakeOrdered,
       ParquetOperations,
       HiveTableScans,
@@ -303,7 +304,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
      */
     def stringResult(): Seq[String] = analyzed match {
       case NativeCommand(cmd) => runSqlHive(cmd)
-      case ExplainCommand(plan) => new QueryExecution { val logical = plan }.toString.split("\n")
+      case ExplainCommand(plan) => mkQueryExecution(plan).toString.split("\n")
       case query =>
         val result: Seq[Seq[Any]] = toRdd.collect().toSeq
         // We need the types so we can output struct field names

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -159,4 +159,5 @@ class HiveQuerySuite extends HiveComparisonTest {
     hql("SHOW TABLES").toString
     hql("SELECT * FROM src").toString
   }
+
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -159,4 +159,5 @@ class HiveQuerySuite extends HiveComparisonTest { @@
         hql("SHOW TABLES").toString
         hql("SELECT * FROM src").toString
       }
     }