From b9e1385a2ac58df4319239be7f53c5b391612352 Mon Sep 17 00:00:00 2001 From: samnordmann Date: Thu, 26 Jan 2023 19:48:48 +0100 Subject: [PATCH] EC/CUDA: remove if constexpr (#718) * EC/CUDA: remove if constexpr * EC/CUDA: remove constexpr specifiers --- .../ec/cuda/kernel/ec_cuda_reduce_ops.h | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/components/ec/cuda/kernel/ec_cuda_reduce_ops.h b/src/components/ec/cuda/kernel/ec_cuda_reduce_ops.h index 49f216f75c..5f7874324a 100644 --- a/src/components/ec/cuda/kernel/ec_cuda_reduce_ops.h +++ b/src/components/ec/cuda/kernel/ec_cuda_reduce_ops.h @@ -84,7 +84,7 @@ cuFloatComplex operator* (const cuFloatComplex & first, } \ _Pragma("unroll") for (i = 0; i < unroll; i++) \ { \ - if constexpr (strided) { \ + if (strided) { \ tmp2[i] = s2[line + warp_size * i + j * ld]; \ } else { \ tmp2[i] = s[1 + j][line + warp_size * i]; \ @@ -115,30 +115,35 @@ cuFloatComplex operator* (const cuFloatComplex & first, { \ _Type * d = (_Type *)task.dst; \ const size_t count = task.count; \ - constexpr bool strided = \ + const bool strided = \ std::is_same<_TaskType, ucc_eee_task_reduce_strided_t>::value; \ - constexpr int MAXSRCS = \ + const int MAXSRCS = \ strided ? USHRT_MAX : UCC_EE_EXECUTOR_NUM_BUFS; \ - constexpr int ALLOC_SIZE = strided ? 1 : UCC_EE_EXECUTOR_NUM_BUFS; \ + const int ALLOC_SIZE = strided ? 1 : UCC_EE_EXECUTOR_NUM_BUFS; \ _Type * s[ALLOC_SIZE]; \ _Type * s1; \ _Type * s2; \ __shared__ uint16_t n_src2; \ size_t ld; \ size_t i, j, line; \ - if constexpr (strided) { \ - n_src2 = task.n_src2; \ - s1 = (_Type *)task.src1; \ - s2 = (_Type *)task.src2; \ - ld = task.stride / sizeof(_Type); \ - ucc_assert_system(task.stride % sizeof(_Type) == 0); \ + if (strided) { \ + auto task_strided_p = \ + reinterpret_cast(&task); \ + n_src2 = task_strided_p->n_src2; \ + s1 = (_Type *)task_strided_p->src1; \ + s2 = (_Type *)task_strided_p->src2; \ + ld = task_strided_p->stride / sizeof(_Type); \ + ucc_assert_system(task_strided_p->stride % sizeof(_Type) == 0); \ } else { \ - memcpy(s, task.srcs, UCC_EE_EXECUTOR_NUM_BUFS * sizeof(_Type *)); \ - n_src2 = task.n_srcs - 1; \ + auto task_default_p = \ + reinterpret_cast(&task); \ + memcpy(s, task_default_p->srcs, \ + UCC_EE_EXECUTOR_NUM_BUFS * sizeof(_Type *)); \ + n_src2 = task_default_p->n_srcs - 1; \ s1 = s[0]; \ } \ CUDA_REDUCE_WITH_OP_CHUNK(0, UNROLL, WARP_SIZE, _OP); \ - /* second call for data remainder */\ + /* second call for data remainder */ \ CUDA_REDUCE_WITH_OP_CHUNK( \ (count / (WARP_SIZE * UNROLL)) * (WARP_SIZE * UNROLL), 1, 1, _OP); \ } \