Skip to content

Commit

Permalink
[PROTOTYPE] partition, unique families and ranges API (#1708)
Browse files Browse the repository at this point in the history
Enabling partition and unique family of scan-like algorithms includes ranges API
Making legacy scan user `__result_and_scratch_storage` to match future type for return to compile
Refactoring of `__pattern` / `__parallel` structure for scan-like algorithms for consistency

---------

Signed-off-by: Dan Hoeflinger <dan.hoeflinger@intel.com>
  • Loading branch information
danhoeflinger committed Aug 7, 2024
1 parent 9d45339 commit 5d0a96c
Show file tree
Hide file tree
Showing 8 changed files with 489 additions and 254 deletions.
80 changes: 35 additions & 45 deletions include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -885,33 +885,6 @@ __pattern_mismatch(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterat
// copy_if
//------------------------------------------------------------------------

template <typename _BackendTag, typename _ExecutionPolicy, typename _Iterator1, typename _IteratorOrTuple,
typename _CreateMaskOp, typename _CopyByMaskOp>
::std::pair<_IteratorOrTuple, typename ::std::iterator_traits<_Iterator1>::difference_type>
__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last,
_IteratorOrTuple __output_first, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op)
{
using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type;

if (__first == __last)
return ::std::make_pair(__output_first, _It1DifferenceType{0});

_It1DifferenceType __n = __last - __first;

auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>();
auto __buf1 = __keep1(__first, __last);
auto __keep2 =
oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _IteratorOrTuple>();
auto __buf2 = __keep2(__output_first, __output_first + __n);

auto __res = __par_backend_hetero::__parallel_scan_copy(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
__buf1.all_view(), __buf2.all_view(), __n, __create_mask_op,
__copy_by_mask_op);

::std::size_t __num_copied = __res.get();
return ::std::make_pair(__output_first + __n, __num_copied);
}

template <typename _BackendTag, typename _ExecutionPolicy, typename _Iterator1, typename _Iterator2,
typename _Predicate>
_Iterator2
Expand Down Expand Up @@ -951,19 +924,24 @@ __pattern_partition_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e
return ::std::make_pair(__result1, __result2);

using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type;
using _ReduceOp = ::std::plus<_It1DifferenceType>;

unseq_backend::__create_mask<_UnaryPredicate, _It1DifferenceType> __create_mask_op{__pred};
unseq_backend::__partition_by_mask<_ReduceOp, /*inclusive*/ ::std::true_type> __copy_by_mask_op{_ReduceOp{}};
_It1DifferenceType __n = __last - __first;

auto __result = __pattern_scan_copy(
__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last,
__par_backend_hetero::zip(
__par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result1),
__par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result2)),
__create_mask_op, __copy_by_mask_op);
auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>();
auto __buf1 = __keep1(__first, __last);

auto __zipped_res = __par_backend_hetero::zip(
__par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result1),
__par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result2));

return ::std::make_pair(__result1 + __result.second, __result2 + (__last - __first - __result.second));
auto __keep2 =
oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, decltype(__zipped_res)>();
auto __buf2 = __keep2(__zipped_res, __zipped_res + __n);

auto __result = oneapi::dpl::__par_backend_hetero::__parallel_partition_copy(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __pred);

return std::make_pair(__result1 + __result.get(), __result2 + (__last - __first - __result.get()));
}

//------------------------------------------------------------------------
Expand All @@ -977,16 +955,28 @@ __pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec
_Iterator2 __result_first, _BinaryPredicate __pred)
{
using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type;
unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, oneapi::dpl::__internal::__pstl_assign,
/*inclusive*/ ::std::true_type, 1>
__copy_by_mask_op;
__create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{
__not_pred<_BinaryPredicate>{__pred}};

auto __result = __pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last,
__result_first, __create_mask_op, __copy_by_mask_op);
_It1DifferenceType __n = __last - __first;

if (__n == 0)
return __result_first;
if (__n == 1)
{
oneapi::dpl::__internal::__pattern_walk2_brick(
__hetero_tag<_BackendTag>{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result_first,
oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{});
return __result_first + 1;
}

auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>();
auto __buf1 = __keep1(__first, __last);
auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>();
auto __buf2 = __keep2(__result_first, __result_first + __n);

auto __result = oneapi::dpl::__par_backend_hetero::__parallel_unique_copy(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __pred);

return __result_first + __result.second;
return __result_first + __result.get();
}

template <typename _Name>
Expand Down
92 changes: 28 additions & 64 deletions include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,67 +334,21 @@ __pattern_count(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& _
// copy_if
//------------------------------------------------------------------------

template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _CreateMaskOp,
typename _CopyByMaskOp>
oneapi::dpl::__internal::__difference_t<_Range1>
__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2,
_CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op)
{
if (__rng1.size() == 0)
return __rng1.size();

using _SizeType = decltype(__rng1.size());
using _ReduceOp = ::std::plus<_SizeType>;
using _Assigner = unseq_backend::__scan_assigner;
using _NoAssign = unseq_backend::__scan_no_assign;
using _MaskAssigner = unseq_backend::__mask_assigner<1>;
using _InitType = unseq_backend::__no_init_value<_SizeType>;
using _DataAcc = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>;

_Assigner __assign_op;
_ReduceOp __reduce_op;
_DataAcc __get_data_op;
_MaskAssigner __add_mask_op;

oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __rng1.size());

auto __res =
__par_backend_hetero::__parallel_transform_scan_base(
_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
oneapi::dpl::__ranges::zip_view(
__rng1, oneapi::dpl::__ranges::all_view<int32_t, __par_backend_hetero::access_mode::read_write>(
__mask_buf.get_buffer())),
__rng2, __reduce_op, _InitType{},
// local scan
unseq_backend::__scan</*inclusive*/ ::std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _Assigner,
_MaskAssigner, _CreateMaskOp, _InitType>{__reduce_op, __get_data_op, __assign_op,
__add_mask_op, __create_mask_op},
// scan between groups
unseq_backend::__scan</*inclusive*/ ::std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _NoAssign,
_Assigner, _DataAcc, _InitType>{__reduce_op, __get_data_op, _NoAssign{}, __assign_op,
__get_data_op},
// global scan
__copy_by_mask_op)
.get();

return __res;
}

template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Predicate,
typename _Assign = oneapi::dpl::__internal::__pstl_assign>
oneapi::dpl::__internal::__difference_t<_Range2>
__pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2,
_Predicate __pred, _Assign)
_Predicate __pred, _Assign&& __assign)
{
using _SizeType = decltype(__rng1.size());
using _ReduceOp = ::std::plus<_SizeType>;
auto __n = __rng1.size();
if (__n == 0)
return 0;

unseq_backend::__create_mask<_Predicate, _SizeType> __create_mask_op{__pred};
unseq_backend::__copy_by_mask<_ReduceOp, _Assign, /*inclusive*/ ::std::true_type, 1> __copy_by_mask_op;
auto __res = oneapi::dpl::__par_backend_hetero::__parallel_copy_if(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng1),
std::forward<_Range2>(__rng2), __n, __pred, std::forward<_Assign>(__assign));

return __ranges::__pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec),
::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2),
__create_mask_op, __copy_by_mask_op);
return __res.get(); //is a blocking call
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -433,17 +387,27 @@ template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typ
typename _BinaryPredicate, typename _Assign = oneapi::dpl::__internal::__pstl_assign>
oneapi::dpl::__internal::__difference_t<_Range2>
__pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result,
_BinaryPredicate __pred, _Assign)
_BinaryPredicate __pred, _Assign&& __assign)
{
using _It1DifferenceType = oneapi::dpl::__internal::__difference_t<_Range1>;
unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, _Assign, /*inclusive*/ ::std::true_type, 1>
__copy_by_mask_op;
__create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{
__not_pred<_BinaryPredicate>{__pred}};

return __ranges::__pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec),
::std::forward<_Range1>(__rng), ::std::forward<_Range2>(__result),
__create_mask_op, __copy_by_mask_op);
auto __n = __rng.size();
if (__n == 0)
return 0;
if (__n == 1)
{
using CopyBrick = oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>;
oneapi::dpl::__par_backend_hetero::__parallel_for(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec),
unseq_backend::walk_n<_ExecutionPolicy, CopyBrick>{CopyBrick{}}, __n, std::forward<_Range1>(__rng),
std::forward<_Range2>(__result))
.get();

return 1;
}

return oneapi::dpl::__par_backend_hetero::__parallel_unique_copy(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng),
std::forward<_Range2>(__result), __pred, std::forward<_Assign>(__assign))
.get();
}

//------------------------------------------------------------------------
Expand Down
Loading

0 comments on commit 5d0a96c

Please sign in to comment.