Skip to content

Commit

Permalink
SPARK-1704: Add CommandStrategy and ExplainCommandPhysical.
Browse files Browse the repository at this point in the history
  • Loading branch information
concretevitamin committed Jun 6, 2014
1 parent 8d21056 commit 408f574
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 1 deletion.
6 changes: 6 additions & 0 deletions sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
val sparkContext = self.sparkContext

val strategies: Seq[Strategy] =
CommandStrategy(self) ::
TakeOrdered ::
PartialAggregation ::
HashJoin ::
Expand Down Expand Up @@ -255,6 +256,11 @@ class SQLContext(@transient val sparkContext: SparkContext)
Batch("Prepare Expressions", Once, new BindReferences[SparkPlan]) :: Nil
}

// TODO: or should we make QueryExecution protected[sql]?
protected[sql] def mkQueryExecution(plan: LogicalPlan) = new QueryExecution {
val logical = plan
}

/**
* The primary workflow for executing relational queries using Spark. Designed to allow easy
* access to the intermediate phases of query execution for developers.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,4 +217,15 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
case _ => Nil
}
}

// TODO: this should be merged with SPARK-1508's SetCommandStrategy
case class CommandStrategy(context: SQLContext) extends Strategy {
def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
case logical.ExplainCommand(child) =>
val qe = context.mkQueryExecution(child)
Seq(execution.ExplainCommandPhysical(qe.executedPlan)(context))
case _ => Nil
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{SQLContext, Row}
import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan

/**
* :: DeveloperApi ::
*/
//@DeveloperApi
//case class SetCommandPhysical(
// key: Option[String],
// value: Option[String],
// context: SQLContext)
// extends LeafNode {
//
// "hi".split("=", 2)
//
// def execute(): RDD[Row] = (key, value) match {
// case (Some(k), Some(v)) => context.emptyResult
// case (Some(k), None) =>
// val resultString = context.sqlConf.getOption(k) match {
// case Some(v) => s"$k=$v"
// case None => s"$k is undefined"
// }
// context.sparkContext.parallelize(Seq(new GenericRow(Array[Any](resultString))), 1)
// case (None, None) =>
// val pairs = context.sqlConf.getAll
// val rows = pairs.map { case (k, v) =>
// new GenericRow(Array[Any](s"$k=$v"))
// }.toSeq
// // Assume config parameters can fit into one split (machine) ;)
// context.sparkContext.parallelize(rows, 1)
// case _ => context.emptyResult
// }
//
// def output: Seq[Attribute] = Seq.empty // TODO: right thing?
//}


case class ExplainCommandPhysical(child: SparkPlan)
(@transient context: SQLContext) extends UnaryNode {
def execute(): RDD[Row] = {
val lines = child.toString.split("\n").map(s => new GenericRow(Array[Any](s)))
context.sparkContext.parallelize(lines)
}

def output: Seq[Attribute] = child.output // right thing to do?
}
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
val hiveContext = self

override val strategies: Seq[Strategy] = Seq(
CommandStrategy(self),
TakeOrdered,
ParquetOperations,
HiveTableScans,
Expand Down Expand Up @@ -303,7 +304,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
*/
def stringResult(): Seq[String] = analyzed match {
case NativeCommand(cmd) => runSqlHive(cmd)
case ExplainCommand(plan) => new QueryExecution { val logical = plan }.toString.split("\n")
case ExplainCommand(plan) => mkQueryExecution(plan).toString.split("\n")
case query =>
val result: Seq[Seq[Any]] = toRdd.collect().toSeq
// We need the types so we can output struct field names
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,5 @@ class HiveQuerySuite extends HiveComparisonTest {
hql("SHOW TABLES").toString
hql("SELECT * FROM src").toString
}

}

0 comments on commit 408f574

Please sign in to comment.