Skip to content

Commit

Permalink
Relocate __lazy_ctor_storage to utils header (#1769)
Browse files Browse the repository at this point in the history
Co-authored-by:  Dan Hoeflinger <dan.hoeflinger@intel.com>
  • Loading branch information
adamfidel and danhoeflinger authored Aug 8, 2024
1 parent c9a1f03 commit 94d12d2
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,6 @@ class __reduce_mid_work_group_kernel;
template <typename... _Name>
class __reduce_kernel;

// Storage helper since _Tp may not have a default constructor.
template <typename _Tp>
union __lazy_ctor_storage
{
_Tp __v;
__lazy_ctor_storage() {}
};

// Adjust number of sequential operations per work-item based on the vector size. Single elements are kept to
// improve performance of small arrays or remainder loops.
template <std::uint8_t _VecSize, typename _Size>
Expand All @@ -76,7 +68,7 @@ __work_group_reduce_kernel(const _NDItemId __item_id, const _Size __n, const _Si
{
auto __local_idx = __item_id.get_local_id(0);
const _Size __group_size = __item_id.get_local_range().size();
__lazy_ctor_storage<_Tp> __result;
oneapi::dpl::__internal::__lazy_ctor_storage<_Tp> __result;
// 1. Initialization (transform part). Fill local memory
__transform_pattern(__item_id, __n, __iters_per_work_item, /*global_offset*/ (_Size)0, __is_full,
/*__n_groups*/ (_Size)1, __result, __acc...);
Expand All @@ -89,7 +81,7 @@ __work_group_reduce_kernel(const _NDItemId __item_id, const _Size __n, const _Si
__reduce_pattern.apply_init(__init, __result.__v);
__res_ptr[0] = __result.__v;
}
__result.__v.~_Tp();
__result.__destroy();
}

// Device kernel that transforms and reduces __n elements to the number of work groups preliminary results.
Expand All @@ -104,7 +96,7 @@ __device_reduce_kernel(const _NDItemId __item_id, const _Size __n, const _Size _
auto __local_idx = __item_id.get_local_id(0);
auto __group_idx = __item_id.get_group(0);
const _Size __group_size = __item_id.get_local_range().size();
__lazy_ctor_storage<_Tp> __result;
oneapi::dpl::__internal::__lazy_ctor_storage<_Tp> __result;
// 1. Initialization (transform part). Fill local memory
__transform_pattern(__item_id, __n, __iters_per_work_item, /*global_offset*/ (_Size)0, __is_full, __n_groups,
__result, __acc...);
Expand All @@ -114,7 +106,7 @@ __device_reduce_kernel(const _NDItemId __item_id, const _Size __n, const _Size _
__result.__v = __reduce_pattern(__item_id, __n_items, __result.__v, __local_mem);
if (__local_idx == 0)
__temp_acc[__group_idx] = __result.__v;
__result.__v.~_Tp();
__result.__destroy();
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -394,7 +386,7 @@ struct __parallel_transform_reduce_impl
// 1. Initialization (transform part). Fill local memory
_Size __n_items;
const bool __is_full = __n == __size_per_work_group * __n_groups;
__lazy_ctor_storage<_Tp> __result;
oneapi::dpl::__internal::__lazy_ctor_storage<_Tp> __result;
if (__is_first)
{
__transform_pattern1(__item_id, __n, __iters_per_work_item, /*global_offset*/ (_Size)0,
Expand All @@ -420,7 +412,7 @@ struct __parallel_transform_reduce_impl

__temp_ptr[__offset_1 + __group_idx] = __result.__v;
}
__result.__v.~_Tp();
__result.__destroy();
});
});
__is_first = false;
Expand Down
6 changes: 3 additions & 3 deletions include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ struct transform_reduce
void
vectorized_reduction_first(const _Size __start_idx, _Res& __res, const _Acc&... __acc) const
{
new (&__res.__v) _Tp(__unary_op(__start_idx, __acc...));
__res.__setup(__unary_op(__start_idx, __acc...));
_ONEDPL_PRAGMA_UNROLL
for (_Size __i = 1; __i < _VecSize; ++__i)
__res.__v = __binary_op(__res.__v, __unary_op(__start_idx + __i, __acc...));
Expand Down Expand Up @@ -251,7 +251,7 @@ struct transform_reduce
return;
if (__iters_per_work_item == 1)
{
new (&__res.__v) _Tp(__unary_op(__global_idx, __acc...));
__res.__setup(__unary_op(__global_idx, __acc...));
return;
}
const _Size __local_range = __item_id.get_local_range(0);
Expand Down Expand Up @@ -318,7 +318,7 @@ struct transform_reduce
// Scalar remainder
else if (__adjusted_global_id < __adjusted_n)
{
new (&__res.__v) _Tp(__unary_op(__adjusted_global_id, __acc...));
__res.__setup(__unary_op(__adjusted_global_id, __acc...));
const _Size __adjusted_global_id_plus_one = __adjusted_global_id + 1;
scalar_reduction_remainder(__adjusted_global_id_plus_one, __adjusted_n, __res, __acc...);
}
Expand Down
21 changes: 21 additions & 0 deletions include/oneapi/dpl/pstl/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,27 @@ struct __is_iterator_type<_T, std::void_t<typename std::iterator_traits<_T>::dif
template <typename _T>
static constexpr bool __is_iterator_type_v = __is_iterator_type<_T>::value;

// Storage helper since _Tp may not have a default constructor.
template <typename _Tp>
union __lazy_ctor_storage
{
using __value_type = _Tp;
_Tp __v;
__lazy_ctor_storage() {}

template <typename _U>
void
__setup(_U&& init)
{
new (&__v) _Tp(std::forward<_U>(init));
}
void
__destroy()
{
__v.~_Tp();
}
};

} // namespace __internal
} // namespace dpl
} // namespace oneapi
Expand Down

0 comments on commit 94d12d2

Please sign in to comment.