Skip to content

Commit

Permalink
[SYCL][CUDA][HIP] Support zero range kernel for cuda and hip backends. (
Browse files Browse the repository at this point in the history
#7044)

- Fixes issue [6963](#6963) to allow
range zero kernel for cuda and hip backends.

Co-authored-by: Romanov Vlad <vlad.romanov@intel.com>
  • Loading branch information
mmoadeli and romanovvlad authored Nov 10, 2022
1 parent 2117657 commit a395886
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 1 deletion.
5 changes: 5 additions & 0 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2988,6 +2988,11 @@ pi_result cuda_piEnqueueKernelLaunch(
assert(work_dim > 0);
assert(work_dim < 4);

if (*global_work_size == 0) {
return cuda_piEnqueueEventsWaitWithBarrier(
command_queue, num_events_in_wait_list, event_wait_list, event);
}

// Set the number of threads per block to the number of threads per warp
// by default unless user has provided a better number
size_t threadsPerBlock[3] = {32u, 1u, 1u};
Expand Down
5 changes: 5 additions & 0 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2758,6 +2758,11 @@ pi_result hip_piEnqueueKernelLaunch(
assert(work_dim > 0);
assert(work_dim < 4);

if (*global_work_size == 0) {
return hip_piEnqueueEventsWaitWithBarrier(
command_queue, num_events_in_wait_list, event_wait_list, event);
}

// Set the number of threads per block to the number of threads per warp
// by default unless user has provided a better number
size_t threadsPerBlock[3] = {32u, 1u, 1u};
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1934,7 +1934,7 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, RT::PiKernel Kernel,
if (NDR.GlobalSize[0] != 0)
return; // GlobalSize is set - no need to adjust
// check the prerequisites:
assert(NDR.NumWorkGroups[0] != 0 && NDR.LocalSize[0] == 0);
assert(NDR.LocalSize[0] == 0);
// TODO might be good to cache this info together with the kernel info to
// avoid get_kernel_work_group_info on every kernel run
range<3> WGSize = get_kernel_device_specific_info<
Expand Down

0 comments on commit a395886

Please sign in to comment.