diff --git a/include/xgboost/span.h b/include/xgboost/span.h index 2bb9945eb0ab..0b543b5372c2 100644 --- a/include/xgboost/span.h +++ b/include/xgboost/span.h @@ -81,25 +81,26 @@ namespace common { #if defined(_MSC_VER) // Windows CUDA doesn't have __assert_fail. -#define KERNEL_CHECK(cond) \ - do { \ - if (XGBOOST_EXPECT(!(cond), false)) { \ - asm("trap;"); \ - } \ +#define CUDA_KERNEL_CHECK(cond) \ + do { \ + if (XGBOOST_EXPECT(!(cond), false)) { \ + asm("trap;"); \ + } \ } while (0) #else // defined(_MSC_VER) #define __ASSERT_STR_HELPER(x) #x -#define KERNEL_CHECK(cond) \ - (XGBOOST_EXPECT((cond), true) \ - ? static_cast(0) \ - : __assert_fail(__ASSERT_STR_HELPER((cond)), __FILE__, __LINE__, \ - __PRETTY_FUNCTION__)) +#define CUDA_KERNEL_CHECK(cond) \ + (XGBOOST_EXPECT((cond), true) \ + ? static_cast(0) \ + : __assert_fail(__ASSERT_STR_HELPER((cond)), __FILE__, __LINE__, __PRETTY_FUNCTION__)) #endif // defined(_MSC_VER) +#define KERNEL_CHECK CUDA_KERNEL_CHECK + #define SPAN_CHECK KERNEL_CHECK #else // ------------------------------ not CUDA ---------------------------- @@ -120,11 +121,7 @@ namespace common { #endif // __CUDA_ARCH__ -#if defined(__CUDA_ARCH__) -#define SPAN_LT(lhs, rhs) KERNEL_CHECK((lhs) < (rhs)) -#else -#define SPAN_LT(lhs, rhs) KERNEL_CHECK((lhs) < (rhs)) -#endif // defined(__CUDA_ARCH__) +#define SPAN_LT(lhs, rhs) SPAN_CHECK((lhs) < (rhs)) namespace detail { /*! @@ -671,7 +668,6 @@ XGBOOST_DEVICE auto as_writable_bytes(Span s) __span_noexcept -> // NOLIN Span::value> { return {reinterpret_cast(s.data()), s.size_bytes()}; } - } // namespace common } // namespace xgboost diff --git a/src/tree/gpu_hist/row_partitioner.cuh b/src/tree/gpu_hist/row_partitioner.cuh index c236b90090b6..1b5a5222229e 100644 --- a/src/tree/gpu_hist/row_partitioner.cuh +++ b/src/tree/gpu_hist/row_partitioner.cuh @@ -120,7 +120,8 @@ class RowPartitioner { int64_t* d_left_count = left_counts_.data().get() + nidx; // Launch 1 thread for each row - dh::LaunchN<1, 128>(segment.Size(), [=] __device__(size_t idx) { + dh::LaunchN<1, 128>(segment.Size(), [segment, op, left_nidx, right_nidx, d_ridx, d_left_count, + d_position] __device__(size_t idx) { // LaunchN starts from zero, so we restore the row index by adding segment.begin idx += segment.begin; RowIndexT ridx = d_ridx[idx];