Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor the aggregate API [databricks] #3910

Merged
merged 6 commits into from
Oct 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,12 @@ case class GpuApproximatePercentile (
"approx_percentile does not support reduction")

// the update expression will create a t-digest (List[Struct[Double, Double])
override lazy val updateExpressions: Seq[Expression] =
new CudfTDigest(inputBuf,
percentageExpression,
accuracyExpression) :: Nil
override lazy val updateAggregates: Seq[CudfAggregate] =
new CudfTDigestUpdate(accuracyExpression) :: Nil

// the merge expression will merge t-digests
override lazy val mergeExpressions: Seq[Expression] =
new CudfTDigest(outputBuf,
percentageExpression,
accuracyExpression) :: Nil
override lazy val mergeAggregates: Seq[CudfAggregate] =
new CudfTDigestMerge(accuracyExpression) :: Nil

// the evaluate expression will compute percentiles based on a t-digest
override lazy val evaluateExpression: Expression = {
Expand Down Expand Up @@ -181,32 +177,25 @@ case class ApproxPercentileFromTDigestExpr(
override def children: Seq[Expression] = Seq(child)
}

class CudfTDigest(
ref: Expression,
percentileExpr: GpuLiteral,
accuracyExpression: GpuLiteral)
extends CudfAggregate(ref) {
class CudfTDigestUpdate(accuracyExpression: GpuLiteral)
extends CudfAggregate {
override lazy val reductionAggregate: cudf.ColumnVector => cudf.Scalar = _ =>
throw new UnsupportedOperationException("TDigest is not yet supported in reduction")
override lazy val groupByAggregate: GroupByAggregation =
GroupByAggregation.createTDigest(CudfTDigest.accuracy(accuracyExpression))
override val name: String = "CudfTDigestUpdate"
override def dataType: DataType = CudfTDigest.dataType
}

// Map Spark delta to cuDF delta
private lazy val accuracy = accuracyExpression.value match {
case delta: Int => delta.max(1000)
case _ => 1000
}
class CudfTDigestMerge(accuracyExpression: GpuLiteral)
extends CudfAggregate {

override lazy val updateReductionAggregateInternal: cudf.ColumnVector => cudf.Scalar =
override lazy val reductionAggregate: cudf.ColumnVector => cudf.Scalar = _ =>
throw new UnsupportedOperationException("TDigest is not yet supported in reduction")
override lazy val mergeReductionAggregateInternal: cudf.ColumnVector => cudf.Scalar =
throw new UnsupportedOperationException("TDigest is not yet supported in reduction")
override lazy val updateAggregate: GroupByAggregationOnColumn =
GroupByAggregation.createTDigest(accuracy)
.onColumn(getOrdinal(ref))
override lazy val mergeAggregate: GroupByAggregationOnColumn =
GroupByAggregation.mergeTDigest(accuracy)
.onColumn(getOrdinal(ref))
override def toString(): String = "CudfTDigest"
override lazy val groupByAggregate: GroupByAggregation =
GroupByAggregation.mergeTDigest(CudfTDigest.accuracy(accuracyExpression))
override val name: String = "CudfTDigestMerge"
override def dataType: DataType = CudfTDigest.dataType
override def nullable: Boolean = false
override protected def otherCopyArgs: Seq[AnyRef] = Seq(percentileExpr, accuracyExpression)
}

object CudfTDigest {
Expand All @@ -218,4 +207,10 @@ object CudfTDigest {
StructField("min", DataTypes.DoubleType, nullable = false),
StructField("max", DataTypes.DoubleType, nullable = false)
))

// Map Spark delta to cuDF delta
def accuracy(accuracyExpression: GpuLiteral): Int = accuracyExpression.value match {
case delta: Int => delta.max(1000)
case _ => 1000
}
}
Loading