Skip to content

Commit

Permalink
Simple framework for debugging query execution
Browse files Browse the repository at this point in the history
  • Loading branch information
marmbrus committed Jun 7, 2014
1 parent 41c4a33 commit c9dded2
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 50 deletions.
5 changes: 0 additions & 5 deletions sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,6 @@ class SQLContext(@transient val sparkContext: SparkContext)
|== Physical Plan ==
|${stringOrError(executedPlan)}
""".stripMargin.trim

/**
* Runs the query after interposing operators that print the result of each intermediate step.
*/
def debugExec() = DebugQuery(executedPlan).execute().collect()
}

/**
Expand Down
45 changes: 0 additions & 45 deletions sql/core/src/main/scala/org/apache/spark/sql/execution/debug.scala

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution

import scala.collection.mutable.HashSet

import org.apache.spark.{AccumulatorParam, Accumulator, SparkContext}
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.SparkContext._
import org.apache.spark.sql.{SchemaRDD, Row}

/**
* :: DeveloperApi ::
* Contains methods for debugging query execution.
*
* Usage:
* {{{
* sql("SELECT key FROM src").debug
* }}}
*/
package object debug {

/**
* :: DeveloperApi ::
* Augments SchemaRDDs with debug methods.
*/
@DeveloperApi
implicit class DebugQuery(query: SchemaRDD) {
def debug(implicit sc: SparkContext): Unit = {
val plan = query.queryExecution.executedPlan
val visited = new collection.mutable.HashSet[Long]()
val debugPlan = plan transform {
case s: SparkPlan if !visited.contains(s.id) =>
visited += s.id
DebugNode(sc, s)
}
println(s"Results returned: ${debugPlan.execute().count()}")
debugPlan.foreach {
case d: DebugNode => d.dumpStats()
case _ =>
}
}
}

private[sql] case class DebugNode(
@transient sparkContext: SparkContext,
child: SparkPlan) extends UnaryNode {
def references = Set.empty

def output = child.output

implicit object SetAccumulatorParam extends AccumulatorParam[HashSet[String]] {
def zero(initialValue: HashSet[String]): HashSet[String] = {
initialValue.clear()
initialValue
}

def addInPlace(v1: HashSet[String], v2: HashSet[String]): HashSet[String] = {
v1 ++= v2
v1
}
}

case class ColumnStat(
elementTypes: Accumulator[HashSet[String]] = sparkContext.accumulator(HashSet.empty))
val tupleCount = sparkContext.accumulator[Int](0)

val numColumns = child.output.size
val columnStats = Array.fill(child.output.size)(new ColumnStat())

def dumpStats(): Unit = {
println(s"== ${child.simpleString} ==")
println(s"Tuples output: ${tupleCount.value}")
child.output.zip(columnStats).foreach { case(attr, stat) =>
val actualDataTypes =stat.elementTypes.value.mkString("{", ",", "}")
println(s" ${attr.name} ${attr.dataType}: $actualDataTypes")
}
}

def execute() = {
child.execute().mapPartitions { iter =>
new Iterator[Row] {
def hasNext = iter.hasNext
def next() = {
val currentRow = iter.next()
tupleCount += 1
var i = 0
while (i < numColumns) {
val value = currentRow(i)
columnStats(i).elementTypes += HashSet(value.getClass.getName)
i += 1
}
currentRow
}
}
}
}
}
}

0 comments on commit c9dded2

Please sign in to comment.