Skip to content

Commit

Permalink
[SPARK-14356] Update spark.sql.execution.debug to work on Datasets
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

Update DebugQuery to work on Datasets of any type, not just DataFrames.

## How was this patch tested?

Added unit tests, checked in spark-shell.

Author: Matei Zaharia <matei@databricks.com>

Closes #12140 from mateiz/debug-dataset.
  • Loading branch information
mateiz authored and rxin committed Apr 4, 2016
1 parent 3f749f7 commit 76f3c73
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ package object debug {
}

/**
* Augments [[DataFrame]]s with debug methods.
* Augments [[Dataset]]s with debug methods.
*/
implicit class DebugQuery(query: DataFrame) extends Logging {
implicit class DebugQuery(query: Dataset[_]) extends Logging {
def debug(): Unit = {
val plan = query.queryExecution.executedPlan
val visited = new collection.mutable.HashSet[TreeNodeRef]()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,19 @@ package org.apache.spark.sql.execution.debug

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.test.SQLTestData.TestData

class DebuggingSuite extends SparkFunSuite with SharedSQLContext {

test("DataFrame.debug()") {
testData.debug()
}

test("Dataset.debug()") {
import testImplicits._
testData.as[TestData].debug()
}

test("debugCodegen") {
val res = codegenString(sqlContext.range(10).groupBy("id").count().queryExecution.executedPlan)
assert(res.contains("Subtree 1 / 2"))
Expand Down

0 comments on commit 76f3c73

Please sign in to comment.