diff --git a/.jenkins/cscs/env-clang-cuda.sh b/.jenkins/cscs/env-clang-cuda.sh index 574551ba91c3..51a4aab1b97b 100644 --- a/.jenkins/cscs/env-clang-cuda.sh +++ b/.jenkins/cscs/env-clang-cuda.sh @@ -14,7 +14,6 @@ export HWLOC_ROOT="${APPS_ROOT}/hwloc-2.0.3-gcc-8.3.0" module load daint-gpu module load cudatoolkit/10.2.89_3.29-7.0.2.1_3.5__g67354b4 module load Boost/1.75.0-CrayCCE-20.11 -module switch cce cce/10.0.2 spack load cmake spack load ninja diff --git a/libs/full/async_cuda/tests/unit/cuda_future.cpp b/libs/full/async_cuda/tests/unit/cuda_future.cpp index 31ee39806bda..9e14adfe2a49 100644 --- a/libs/full/async_cuda/tests/unit/cuda_future.cpp +++ b/libs/full/async_cuda/tests/unit/cuda_future.cpp @@ -36,7 +36,11 @@ template extern void cuda_trivial_kernel(T, cudaStream_t stream); -extern __global__ void saxpy(int n, float a, float* x, float* y); +// Need to move the call to the saxpy device kernel in .cu, as the symbol change +// from saxpy to __device_stub__saxpy when moving from Clang 10 to Clang 11 +extern void launch_saxpy_kernel( + hpx::cuda::experimental::cuda_executor& cudaexec, unsigned int& blocks, + unsigned int& threads, void** args); // ------------------------------------------------------------------------- int test_saxpy(hpx::cuda::experimental::cuda_executor& cudaexec) { @@ -77,13 +81,7 @@ int test_saxpy(hpx::cuda::experimental::cuda_executor& cudaexec) // now launch a kernel on the stream void* args[] = {&N, &ratio, &d_A, &d_B}; -#ifdef HPX_HAVE_HIP - hpx::apply(cudaexec, cudaLaunchKernel, -#else - hpx::apply(cudaexec, cudaLaunchKernel, -#endif - reinterpret_cast(&saxpy), dim3(blocks), dim3(threads), - args, std::size_t(0)); + launch_saxpy_kernel(cudaexec, blocks, threads, args); // finally, perform a copy from the gpu back to the cpu all on the same stream // grab a future to when this completes diff --git a/libs/full/async_cuda/tests/unit/saxpy.cu b/libs/full/async_cuda/tests/unit/saxpy.cu index 6245e8acb3b8..206ec641c674 100644 --- a/libs/full/async_cuda/tests/unit/saxpy.cu +++ b/libs/full/async_cuda/tests/unit/saxpy.cu @@ -4,11 +4,30 @@ // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#include + +#include #include +#include + __global__ void saxpy(int n, float a, float* x, float* y) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < n) y[i] = a * x[i] + y[i]; } + +void launch_saxpy_kernel(hpx::cuda::experimental::cuda_executor& cudaexec, + unsigned int& blocks, unsigned int& threads, void** args) +{ + // Invoking hpx::apply with cudaLaunchKernel directly result in an + // error for NVCC with gcc configuration +#ifdef HPX_HAVE_HIP + auto launch_kernel = cudaLaunchKernel; +#else + auto launch_kernel = cudaLaunchKernel; +#endif + hpx::apply(cudaexec, launch_kernel, reinterpret_cast(&saxpy), + dim3(blocks), dim3(threads), args, std::size_t(0)); +}