Skip to content

Commit

Permalink
partition + unique patterns; ranges API
Browse files Browse the repository at this point in the history
Signed-off-by: Dan Hoeflinger <dan.hoeflinger@intel.com>
  • Loading branch information
danhoeflinger committed Jul 22, 2024
1 parent f0d0c68 commit 7ad26d7
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 149 deletions.
25 changes: 9 additions & 16 deletions include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -886,10 +886,10 @@ __pattern_mismatch(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterat
//------------------------------------------------------------------------

template <typename _BackendTag, typename _ExecutionPolicy, typename _Iterator1, typename _IteratorOrTuple,
typename _CreateMaskOp, typename _CopyByMaskOp>
typename _GenMask, typename _WriteOp>
::std::pair<_IteratorOrTuple, typename ::std::iterator_traits<_Iterator1>::difference_type>
__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last,
_IteratorOrTuple __output_first, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op)
_IteratorOrTuple __output_first, _GenMask __gen_mask, _WriteOp __write_op)
{
using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type;

Expand All @@ -904,9 +904,9 @@ __pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Itera
oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _IteratorOrTuple>();
auto __buf2 = __keep2(__output_first, __output_first + __n);

auto __res = __par_backend_hetero::__parallel_scan_copy(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
__buf1.all_view(), __buf2.all_view(), __n, __create_mask_op,
__copy_by_mask_op);
auto __res =
__par_backend_hetero::__parallel_scan_copy(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
__buf1.all_view(), __buf2.all_view(), __n, __gen_mask, __write_op);

::std::size_t __num_copied = __res.get();
return ::std::make_pair(__output_first + __n, __num_copied);
Expand Down Expand Up @@ -951,17 +951,14 @@ __pattern_partition_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e
return ::std::make_pair(__result1, __result2);

using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type;
using _ReduceOp = ::std::plus<_It1DifferenceType>;

unseq_backend::__create_mask<_UnaryPredicate, _It1DifferenceType> __create_mask_op{__pred};
unseq_backend::__partition_by_mask<_ReduceOp, /*inclusive*/ ::std::true_type> __copy_by_mask_op{_ReduceOp{}};

auto __result = __pattern_scan_copy(
__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last,
__par_backend_hetero::zip(
__par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result1),
__par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result2)),
__create_mask_op, __copy_by_mask_op);
oneapi::dpl::__par_backend_hetero::__gen_mask<_UnaryPredicate>{__pred},
oneapi::dpl::__par_backend_hetero::__write_to_idx_if_else{});

return ::std::make_pair(__result1 + __result.second, __result2 + (__last - __first - __result.second));
}
Expand All @@ -977,14 +974,10 @@ __pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec
_Iterator2 __result_first, _BinaryPredicate __pred)
{
using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type;
unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, oneapi::dpl::__internal::__pstl_assign,
/*inclusive*/ ::std::true_type, 1>
__copy_by_mask_op;
__create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{
__not_pred<_BinaryPredicate>{__pred}};

auto __result = __pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last,
__result_first, __create_mask_op, __copy_by_mask_op);
__result_first, oneapi::dpl::__par_backend_hetero::__gen_unique_mask{},
oneapi::dpl::__par_backend_hetero::__write_to_idx_if{});

return __result_first + __result.second;
}
Expand Down
76 changes: 20 additions & 56 deletions include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,50 +334,20 @@ __pattern_count(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& _
// copy_if
//------------------------------------------------------------------------

template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _CreateMaskOp,
typename _CopyByMaskOp>
template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _GenMask,
typename _WriteOp>
oneapi::dpl::__internal::__difference_t<_Range1>
__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2,
_CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op)
_GenMask __gen_mask, _WriteOp __write_op)
{
if (__rng1.size() == 0)
return __rng1.size();

using _SizeType = decltype(__rng1.size());
using _ReduceOp = ::std::plus<_SizeType>;
using _Assigner = unseq_backend::__scan_assigner;
using _NoAssign = unseq_backend::__scan_no_assign;
using _MaskAssigner = unseq_backend::__mask_assigner<1>;
using _InitType = unseq_backend::__no_init_value<_SizeType>;
using _DataAcc = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>;

_Assigner __assign_op;
_ReduceOp __reduce_op;
_DataAcc __get_data_op;
_MaskAssigner __add_mask_op;

oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __rng1.size());

auto __res =
__par_backend_hetero::__parallel_transform_scan_base(
_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
oneapi::dpl::__ranges::zip_view(
__rng1, oneapi::dpl::__ranges::all_view<int32_t, __par_backend_hetero::access_mode::read_write>(
__mask_buf.get_buffer())),
__rng2, __reduce_op, _InitType{},
// local scan
unseq_backend::__scan</*inclusive*/ ::std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _Assigner,
_MaskAssigner, _CreateMaskOp, _InitType>{__reduce_op, __get_data_op, __assign_op,
__add_mask_op, __create_mask_op},
// scan between groups
unseq_backend::__scan</*inclusive*/ ::std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _NoAssign,
_Assigner, _DataAcc, _InitType>{__reduce_op, __get_data_op, _NoAssign{}, __assign_op,
__get_data_op},
// global scan
__copy_by_mask_op)
.get();
auto __n = __rng1.size();
if (__n == 0)
return 0;

return __res;
auto __res = __par_backend_hetero::__parallel_scan_copy(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
std::forward<_Range1>(__rng1),
std::forward<_Range2>(__rng2), __n, __gen_mask, __write_op);
return __res.get();
}

template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Predicate,
Expand All @@ -386,15 +356,15 @@ oneapi::dpl::__internal::__difference_t<_Range2>
__pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2,
_Predicate __pred, _Assign)
{
using _SizeType = decltype(__rng1.size());
using _ReduceOp = ::std::plus<_SizeType>;
auto __n = __rng1.size();
if (__n == 0)
return 0;

unseq_backend::__create_mask<_Predicate, _SizeType> __create_mask_op{__pred};
unseq_backend::__copy_by_mask<_ReduceOp, _Assign, /*inclusive*/ ::std::true_type, 1> __copy_by_mask_op;
auto __res = oneapi::dpl::__par_backend_hetero::__parallel_copy_if(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng1),
std::forward<_Range2>(__rng2), __n, __pred, _Assign{});

return __ranges::__pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec),
::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2),
__create_mask_op, __copy_by_mask_op);
return __res.get(); //is a blocking call
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -435,15 +405,9 @@ oneapi::dpl::__internal::__difference_t<_Range2>
__pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result,
_BinaryPredicate __pred, _Assign)
{
using _It1DifferenceType = oneapi::dpl::__internal::__difference_t<_Range1>;
unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, _Assign, /*inclusive*/ ::std::true_type, 1>
__copy_by_mask_op;
__create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{
__not_pred<_BinaryPredicate>{__pred}};

return __ranges::__pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec),
::std::forward<_Range1>(__rng), ::std::forward<_Range2>(__result),
__create_mask_op, __copy_by_mask_op);
return __pattern_scan_copy(__tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng),
std::forward<_Range2>(__result), oneapi::dpl::__par_backend_hetero::__gen_unique_mask{},
oneapi::dpl::__par_backend_hetero::__write_to_idx_if{});
}

//------------------------------------------------------------------------
Expand Down
120 changes: 71 additions & 49 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -815,19 +815,44 @@ struct __simple_write_to_idx
};

template <typename _Predicate>
struct __gen_count_pred
struct __gen_mask
{
template <typename _InRng>
bool
operator()(_InRng&& __in_rng, std::size_t __idx) const
{
return __pred(__in_rng[__idx]);
}
_Predicate __pred;
};

struct __gen_unique_mask
{
template <typename _InRng>
bool
operator()(_InRng&& __in_rng, std::size_t __idx) const
{
if (__idx == 0)
return true;
else
return (__in_rng[__idx] != __in_rng[__idx - 1]);
}
};

template <typename _GenMask>
struct __gen_count_mask
{
template <typename _InRng, typename _SizeType>
_SizeType
operator()(_InRng&& __in_rng, _SizeType __idx) const
{
return __pred(__in_rng[__idx]) ? _SizeType{1} : _SizeType{0};
return __gen_mask(std::forward<_InRng>(__in_rng), __idx) ? _SizeType{1} : _SizeType{0};
}
_Predicate __pred;
_GenMask __gen_mask;
};

template <typename _Predicate>
struct __gen_expand_count_pred
template <typename _GenMask>
struct __gen_expand_count_mask
{
template <typename _InRng, typename _SizeType>
auto
Expand All @@ -839,10 +864,10 @@ struct __gen_expand_count_pred
using _ElementType =
oneapi::dpl::__internal::__decay_with_tuple_specialization_t<oneapi::dpl::__internal::__value_t<_InRng>>;
_ElementType ele = __in_rng[__idx];
bool mask = __pred(ele);
bool mask = __gen_mask(__in_rng, __idx);
return std::tuple(mask ? _SizeType{1} : _SizeType{0}, mask, ele);
}
_Predicate __pred;
_GenMask __gen_mask;
};

struct __get_zeroth_element
Expand All @@ -854,7 +879,7 @@ struct __get_zeroth_element
return std::get<0>(std::forward<_Tp>(__a));
}
};

template <typename Assign = oneapi::dpl::__internal::__pstl_assign>
struct __write_to_idx_if
{
template <typename _OutRng, typename _SizeType, typename ValueType>
Expand All @@ -867,8 +892,24 @@ struct __write_to_idx_if
typename oneapi::dpl::__internal::__get_tuple_type<std::decay_t<decltype(std::get<2>(__v))>,
std::decay_t<decltype(__out[__idx])>>::__type;
if (std::get<1>(__v))
__out[std::get<0>(__v) - 1] = static_cast<_ConvertedTupleType>(std::get<2>(__v));
__assign(static_cast<_ConvertedTupleType>(std::get<2>(__v), __out[std::get<0>(__v) - 1]);
}
Assign __assign;
};

template <typename Assign = oneapi::dpl::__internal::__pstl_assign>
struct __write_to_idx_if_else
{
template <typename _OutRng, typename _SizeType, typename ValueType>
void
operator()(_OutRng&& __out, _SizeType __idx, const ValueType& __v) const
{
if (std::get<1>(__v))
__assign(std::get<2>(__v), std::get<0>(__out[std::get<0>(__v) - 1]));
else
__assign(std::get<2>(__v), std::get<1>(__out[__idx - std::get<0>(__v)]));
}
Assign __assign;
};

template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _UnaryOperation, typename _InitType,
Expand Down Expand Up @@ -975,51 +1016,29 @@ struct __invoke_single_group_copy_if
}
};

template <typename _ExecutionPolicy, typename _InRng, typename _OutRng, typename _Size, typename _CreateMaskOp,
typename _CopyByMaskOp>
template <typename _ExecutionPolicy, typename _InRng, typename _OutRng, typename _Size, typename _GenMask,
typename _WriteOp>
auto
__parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec,
_InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _CreateMaskOp __create_mask_op,
_CopyByMaskOp __copy_by_mask_op)
_InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _GenMask __generate_mask, _WriteOp __write_op)
{
using _ReduceOp = ::std::plus<_Size>;
using _Assigner = unseq_backend::__scan_assigner;
using _NoAssign = unseq_backend::__scan_no_assign;
using _MaskAssigner = unseq_backend::__mask_assigner<1>;
using _DataAcc = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>;
using _InitType = unseq_backend::__no_init_value<_Size>;

_Assigner __assign_op;
_ReduceOp __reduce_op;
_DataAcc __get_data_op;
_MaskAssigner __add_mask_op;

// temporary buffer to store boolean mask
oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __n);

return __parallel_transform_scan_base(
__backend_tag, ::std::forward<_ExecutionPolicy>(__exec),
oneapi::dpl::__ranges::make_zip_view(
::std::forward<_InRng>(__in_rng),
oneapi::dpl::__ranges::all_view<int32_t, __par_backend_hetero::access_mode::read_write>(
__mask_buf.get_buffer())),
::std::forward<_OutRng>(__out_rng), __reduce_op, _InitType{},
// local scan
unseq_backend::__scan</*inclusive*/ ::std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _Assigner,
_MaskAssigner, _CreateMaskOp, _InitType>{__reduce_op, __get_data_op, __assign_op,
__add_mask_op, __create_mask_op},
// scan between groups
unseq_backend::__scan</*inclusive*/ ::std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _NoAssign,
_Assigner, _DataAcc, _InitType>{__reduce_op, __get_data_op, _NoAssign{}, __assign_op,
__get_data_op},
// global scan
__copy_by_mask_op);

return __parallel_transform_reduce_then_scan(
__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng),
std::forward<_OutRng>(__out_rng), oneapi::dpl::__par_backend_hetero::__gen_count_mask<_GenMask>{__generate_mask},
_ReduceOp{}, oneapi::dpl::__par_backend_hetero::__gen_expand_count_mask<_GenMask>{__generate_mask},
oneapi::dpl::__par_backend_hetero::__get_zeroth_element{},
__write_op,
oneapi::dpl::unseq_backend::__no_init_value<_Size>{},
/*_Inclusive=*/std::true_type{});
}

template <typename _ExecutionPolicy, typename _InRng, typename _OutRng, typename _Size, typename _Pred>
template <typename _ExecutionPolicy, typename _InRng, typename _OutRng, typename _Size, typename _Pred,
typename _Assign = oneapi::dpl::__internal::__pstl_assign>
auto
__parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec,
_InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred)
_InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred, _Assign __assign = _Assign{})
{
using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>;

Expand Down Expand Up @@ -1049,13 +1068,16 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag,
else
{
using _ReduceOp = ::std::plus<_Size>;
using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_mask<_Pred>;
_GenMask __generate_mask{__pred};

return __parallel_transform_reduce_then_scan(
__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng),
std::forward<_OutRng>(__out_rng), oneapi::dpl::__par_backend_hetero::__gen_count_pred<_Pred>{__pred},
_ReduceOp{}, oneapi::dpl::__par_backend_hetero::__gen_expand_count_pred<_Pred>{__pred},
std::forward<_OutRng>(__out_rng),
oneapi::dpl::__par_backend_hetero::__gen_count_mask<_GenMask>{__generate_mask}, _ReduceOp{},
oneapi::dpl::__par_backend_hetero::__gen_expand_count_mask<_GenMask>{__generate_mask},
oneapi::dpl::__par_backend_hetero::__get_zeroth_element{},
oneapi::dpl::__par_backend_hetero::__write_to_idx_if{},
oneapi::dpl::__par_backend_hetero::__write_to_idx_if<_Assign>{__assign},
oneapi::dpl::unseq_backend::__no_init_value<_Size>{},
/*_Inclusive=*/std::true_type{});
}
Expand Down
Loading

0 comments on commit 7ad26d7

Please sign in to comment.