Skip to content

Commit

Permalink
Merge pull request #5143 from jlowe/fix-merge
Browse files Browse the repository at this point in the history
Fix merge conflict with branch-22.04
  • Loading branch information
pxLi authored Apr 6, 2022
2 parents f3a0d2c + 5fd3798 commit 81dbb75
Showing 1 changed file with 20 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package com.nvidia.spark.rapids
import scala.annotation.tailrec
import scala.collection.mutable.Queue

import ai.rapids.cudf.{HostColumnVector, NvtxColor, Table}
import ai.rapids.cudf.{Cuda, HostColumnVector, NvtxColor, Table}
import com.nvidia.spark.rapids.shims.ShimUnaryExecNode

import org.apache.spark.TaskContext
Expand Down Expand Up @@ -340,6 +340,19 @@ case class GpuColumnarToRowExec(
}

object GpuColumnarToRowExec {
/**
* Helper to check if GPU accelerated row-column transpose is supported.
* This is a workaround for [[https://github.com/rapidsai/cudf/issues/10569]],
* where CUDF JNI column->row transposition works incorrectly on certain
* GPU architectures.
*/
private lazy val isAcceleratedTransposeSupported: Boolean = {
// Check if the current CUDA device architecture exceeds Pascal.
// i.e. CUDA compute capability > 6.x.
// Reference: https://developer.nvidia.com/cuda-gpus
Cuda.getComputeCapabilityMajor > 6
}

def makeIteratorFunc(
output: Seq[Attribute],
numOutputRows: GpuMetric,
Expand All @@ -353,7 +366,12 @@ object GpuColumnarToRowExec {
// This number includes the 1-bit validity per column, but doesn't include padding.
// We are being conservative by only allowing 100M columns until we feel the need to
// increase this number
output.length <= 100000000) {
output.length <= 100000000 &&
// Work around {@link https://github.com/rapidsai/cudf/issues/10569}, where CUDF JNI
// acceleration of column->row transposition produces incorrect results on certain
// GPU architectures.
// Check that the accelerated transpose works correctly on the current CUDA device.
isAcceleratedTransposeSupported) {
(batches: Iterator[ColumnarBatch]) => {
// UnsafeProjection is not serializable so do it on the executor side
val toUnsafe = UnsafeProjection.create(output, output)
Expand Down

0 comments on commit 81dbb75

Please sign in to comment.