diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 7027f792b40..3d7a45950fd 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -694,7 +694,7 @@ __parallel_transform_scan_single_group(oneapi::dpl::__internal::__device_backend oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__scan_single_wg_kernel< ::std::integral_constant<::std::uint16_t, __wg_size>, ::std::integral_constant<::std::uint16_t, __num_elems_per_item>, _BinaryOperation, - /* _IsFullGroup= */ ::std::false_type, _Inclusive, _CustomName>>>()( + /* _IsFullGroup= */ ::std::false_type, _Inclusive, _TempStorage, _CustomName>>>()( ::std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng), __n, __init, __binary_op, __unary_op); return __future(__event, __dummy_result_and_scratch); @@ -873,11 +873,10 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen } } oneapi::dpl::__par_backend_hetero::__gen_transform_input<_UnaryOperation> __gen_transform{__unary_op}; - return __future(__parallel_transform_reduce_then_scan( + return __parallel_transform_reduce_then_scan( __backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__in_rng), std::forward<_Range2>(__out_rng), __gen_transform, __binary_op, __gen_transform, - oneapi::dpl::__internal::__no_op{}, __simple_write_to_idx{}, __init, _Inclusive{}) - .event()); + oneapi::dpl::__internal::__no_op{}, __simple_write_to_idx{}, __init, _Inclusive{}); } else { @@ -890,7 +889,7 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen _NoAssign __no_assign_op; _NoOpFunctor __get_data_op; - return __future( + return __parallel_transform_scan_base( __backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__in_rng), std::forward<_Range2>(__out_rng), __binary_op, __init, @@ -903,8 +902,7 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen _NoAssign, _Assigner, _NoOpFunctor, unseq_backend::__no_init_value<_Type>>{ __binary_op, _NoOpFunctor{}, __no_assign_op, __assign_op, __get_data_op}, // global scan - unseq_backend::__global_scan_functor<_Inclusive, _BinaryOperation, _InitType>{__binary_op, __init}) - .event()); + unseq_backend::__global_scan_functor<_Inclusive, _BinaryOperation, _InitType>{__binary_op, __init}); } } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h index b553f6cfe8c..6dad5c1f623 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h @@ -753,7 +753,7 @@ __parallel_transform_reduce_then_scan(oneapi::dpl::__internal::__device_backend_ //We need temporary storage for reductions of each sub-group (__num_sub_groups_global), and also 2 for the // block carry-out. We need two for the block carry-out to prevent a race condition between reading and writing // the block carry-out within a single kernel. - __result_and_scratch_storage<_ExecutionPolicy, _ValueType> __result_and_scratch{__exec, + __result_and_scratch_storage, _ValueType> __result_and_scratch{__exec, __num_sub_groups_global + 2}; // Reduce and scan step implementations