Skip to content

Commit

Permalink
Merge pull request #5293 from aurianer/fix_clang11_cuda_future_test
Browse files Browse the repository at this point in the history
Fix Clang 11 cuda_future test bug
  • Loading branch information
msimberg authored Apr 20, 2021
2 parents 84cf823 + e967a57 commit b01c5bc
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 9 deletions.
1 change: 0 additions & 1 deletion .jenkins/cscs/env-clang-cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ export HWLOC_ROOT="${APPS_ROOT}/hwloc-2.0.3-gcc-8.3.0"
module load daint-gpu
module load cudatoolkit/10.2.89_3.29-7.0.2.1_3.5__g67354b4
module load Boost/1.75.0-CrayCCE-20.11
module switch cce cce/10.0.2
spack load cmake
spack load ninja

Expand Down
14 changes: 6 additions & 8 deletions libs/full/async_cuda/tests/unit/cuda_future.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@
template <typename T>
extern void cuda_trivial_kernel(T, cudaStream_t stream);

extern __global__ void saxpy(int n, float a, float* x, float* y);
// Need to move the call to the saxpy device kernel in .cu, as the symbol change
// from saxpy to __device_stub__saxpy when moving from Clang 10 to Clang 11
extern void launch_saxpy_kernel(
hpx::cuda::experimental::cuda_executor& cudaexec, unsigned int& blocks,
unsigned int& threads, void** args);
// -------------------------------------------------------------------------
int test_saxpy(hpx::cuda::experimental::cuda_executor& cudaexec)
{
Expand Down Expand Up @@ -77,13 +81,7 @@ int test_saxpy(hpx::cuda::experimental::cuda_executor& cudaexec)

// now launch a kernel on the stream
void* args[] = {&N, &ratio, &d_A, &d_B};
#ifdef HPX_HAVE_HIP
hpx::apply(cudaexec, cudaLaunchKernel,
#else
hpx::apply(cudaexec, cudaLaunchKernel<void>,
#endif
reinterpret_cast<const void*>(&saxpy), dim3(blocks), dim3(threads),
args, std::size_t(0));
launch_saxpy_kernel(cudaexec, blocks, threads, args);

// finally, perform a copy from the gpu back to the cpu all on the same stream
// grab a future to when this completes
Expand Down
19 changes: 19 additions & 0 deletions libs/full/async_cuda/tests/unit/saxpy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,30 @@
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <hpx/local/future.hpp>

#include <hpx/async_cuda/cuda_executor.hpp>
#include <hpx/async_cuda/custom_gpu_api.hpp>

#include <cstddef>

__global__ void saxpy(int n, float a, float* x, float* y)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
y[i] = a * x[i] + y[i];
}

void launch_saxpy_kernel(hpx::cuda::experimental::cuda_executor& cudaexec,
unsigned int& blocks, unsigned int& threads, void** args)
{
// Invoking hpx::apply with cudaLaunchKernel<void> directly result in an
// error for NVCC with gcc configuration
#ifdef HPX_HAVE_HIP
auto launch_kernel = cudaLaunchKernel;
#else
auto launch_kernel = cudaLaunchKernel<void>;
#endif
hpx::apply(cudaexec, launch_kernel, reinterpret_cast<const void*>(&saxpy),
dim3(blocks), dim3(threads), args, std::size_t(0));
}

0 comments on commit b01c5bc

Please sign in to comment.