Skip to content

Commit

Permalink
Revert "remove unique pattern family from reduce_then_scan"
Browse files Browse the repository at this point in the history
This reverts commit 4d78ec3.
  • Loading branch information
danhoeflinger committed Aug 6, 2024
1 parent eafdcc0 commit 6a7bae0
Show file tree
Hide file tree
Showing 7 changed files with 210 additions and 118 deletions.
55 changes: 20 additions & 35 deletions include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -885,33 +885,6 @@ __pattern_mismatch(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterat
// copy_if
//------------------------------------------------------------------------

template <typename _BackendTag, typename _ExecutionPolicy, typename _Iterator1, typename _IteratorOrTuple,
typename _CreateMaskOp, typename _CopyByMaskOp>
::std::pair<_IteratorOrTuple, typename ::std::iterator_traits<_Iterator1>::difference_type>
__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last,
_IteratorOrTuple __output_first, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op)
{
using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type;

if (__first == __last)
return ::std::make_pair(__output_first, _It1DifferenceType{0});

_It1DifferenceType __n = __last - __first;

auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>();
auto __buf1 = __keep1(__first, __last);
auto __keep2 =
oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _IteratorOrTuple>();
auto __buf2 = __keep2(__output_first, __output_first + __n);

auto __res = __par_backend_hetero::__parallel_scan_copy(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
__buf1.all_view(), __buf2.all_view(), __n, __create_mask_op,
__copy_by_mask_op);

::std::size_t __num_copied = __res.get();
return ::std::make_pair(__output_first + __n, __num_copied);
}

template <typename _BackendTag, typename _ExecutionPolicy, typename _Iterator1, typename _Iterator2,
typename _Predicate>
_Iterator2
Expand Down Expand Up @@ -982,16 +955,28 @@ __pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec
_Iterator2 __result_first, _BinaryPredicate __pred)
{
using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type;
unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, oneapi::dpl::__internal::__pstl_assign,
/*inclusive*/ ::std::true_type, 1>
__copy_by_mask_op;
__create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{
__not_pred<_BinaryPredicate>{__pred}};

auto __result = __pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last,
__result_first, __create_mask_op, __copy_by_mask_op);
_It1DifferenceType __n = __last - __first;

if (__n == 0)
return __result_first;
if (__n == 1)
{
oneapi::dpl::__internal::__pattern_walk2_brick(
__hetero_tag<_BackendTag>{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result_first,
oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{});
return __result_first + 1;
}

auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>();
auto __buf1 = __keep1(__first, __last);
auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>();
auto __buf2 = __keep2(__result_first, __result_first + __n);

auto __result = oneapi::dpl::__par_backend_hetero::__parallel_unique_copy(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __pred);

return __result_first + __result.second;
return __result_first + __result.get();
}

template <typename _Name>
Expand Down
93 changes: 34 additions & 59 deletions include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,52 +334,6 @@ __pattern_count(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& _
// copy_if
//------------------------------------------------------------------------

template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _CreateMaskOp,
typename _CopyByMaskOp>
oneapi::dpl::__internal::__difference_t<_Range1>
__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2,
_CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op)
{
if (__rng1.size() == 0)
return __rng1.size();

using _SizeType = decltype(__rng1.size());
using _ReduceOp = ::std::plus<_SizeType>;
using _Assigner = unseq_backend::__scan_assigner;
using _NoAssign = unseq_backend::__scan_no_assign;
using _MaskAssigner = unseq_backend::__mask_assigner<1>;
using _InitType = unseq_backend::__no_init_value<_SizeType>;
using _DataAcc = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>;

_Assigner __assign_op;
_ReduceOp __reduce_op;
_DataAcc __get_data_op;
_MaskAssigner __add_mask_op;

oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __rng1.size());

auto __res =
__par_backend_hetero::__parallel_transform_scan_base(
_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
oneapi::dpl::__ranges::zip_view(
__rng1, oneapi::dpl::__ranges::all_view<int32_t, __par_backend_hetero::access_mode::read_write>(
__mask_buf.get_buffer())),
__rng2, __reduce_op, _InitType{},
// local scan
unseq_backend::__scan</*inclusive*/ ::std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _Assigner,
_MaskAssigner, _CreateMaskOp, _InitType>{__reduce_op, __get_data_op, __assign_op,
__add_mask_op, __create_mask_op},
// scan between groups
unseq_backend::__scan</*inclusive*/ ::std::true_type, _ExecutionPolicy, _ReduceOp, _DataAcc, _NoAssign,
_Assigner, _DataAcc, _InitType>{__reduce_op, __get_data_op, _NoAssign{}, __assign_op,
__get_data_op},
// global scan
__copy_by_mask_op)
.get();

return __res;
}

template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Predicate,
typename _Assign = oneapi::dpl::__internal::__pstl_assign>
oneapi::dpl::__internal::__difference_t<_Range2>
Expand Down Expand Up @@ -429,27 +383,45 @@ __pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec,
// unique_copy
//------------------------------------------------------------------------

template <typename _Name>
struct __copy_wrapper;

template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2,
typename _BinaryPredicate, typename _Assign = oneapi::dpl::__internal::__pstl_assign>
oneapi::dpl::__internal::__difference_t<_Range2>
__pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result,
_BinaryPredicate __pred, _Assign)
_BinaryPredicate __pred, _Assign&& __assign)
{
using _It1DifferenceType = oneapi::dpl::__internal::__difference_t<_Range1>;
unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, _Assign, /*inclusive*/ ::std::true_type, 1>
__copy_by_mask_op;
__create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{
__not_pred<_BinaryPredicate>{__pred}};

return __ranges::__pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec),
::std::forward<_Range1>(__rng), ::std::forward<_Range2>(__result),
__create_mask_op, __copy_by_mask_op);
auto __n = __rng.size();
if (__n == 0)
return 0;
if (__n == 1)
{
using CopyBrick = oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>;
oneapi::dpl::__par_backend_hetero::__parallel_for(
_BackendTag{},
oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_wrapper>(
::std::forward<_ExecutionPolicy>(__exec)),
unseq_backend::walk_n<_ExecutionPolicy, CopyBrick>{CopyBrick{}}, __n, std::forward<_Range1>(__rng),
std::forward<_Range2>(__result))
.get();

return 1;
}

return oneapi::dpl::__par_backend_hetero::__parallel_unique_copy(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng),
std::forward<_Range2>(__result), __pred, std::forward<_Assign>(__assign))
.get();
}

//------------------------------------------------------------------------
// unique
//------------------------------------------------------------------------

template <typename _Name>
struct __unique_wrapper;

template <typename _BackendTag, typename _ExecutionPolicy, typename _Range, typename _BinaryPredicate>
oneapi::dpl::__internal::__difference_t<_Range>
__pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __pred)
Expand All @@ -461,10 +433,13 @@ __pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ra

oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __rng.size());
auto res_rng = oneapi::dpl::__ranges::views::all(__buf.get_buffer());
auto res = __ranges::__pattern_unique_copy(__tag, __exec, __rng, res_rng, __pred,
oneapi::dpl::__internal::__pstl_assign());
auto res = __ranges::__pattern_unique_copy(
__tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__unique_wrapper>(__exec), __rng, res_rng, __pred,
oneapi::dpl::__internal::__pstl_assign());

__ranges::__pattern_walk_n(__tag, ::std::forward<_ExecutionPolicy>(__exec),
__ranges::__pattern_walk_n(__tag,
oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_wrapper>(
::std::forward<_ExecutionPolicy>(__exec)),
__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}, res_rng,
::std::forward<_Range>(__rng));
return res;
Expand Down
61 changes: 55 additions & 6 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,19 @@ struct __gen_mask
_Predicate __pred;
};

template <typename _BinaryPredicate>
struct __gen_unique_mask
{
template <typename _InRng>
bool
operator()(_InRng&& __in_rng, std::size_t __idx) const
{
//starting index is offset to 1 for "unique" patterns and 0th element copy is handled separately
return !__pred(__in_rng[__idx], __in_rng[__idx - 1]);
}
_BinaryPredicate __pred;
};

template <typename _GenMask>
struct __gen_count_mask
{
Expand Down Expand Up @@ -926,7 +939,8 @@ __parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backen
return __parallel_transform_reduce_then_scan(
__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__in_rng),
std::forward<_Range2>(__out_rng), __gen_transform, __binary_op, __gen_transform, _ScanInputTransform{},
_WriteOp{}, __init, _Inclusive{});
_WriteOp{}, __init, _Inclusive{},
/*_IsUniquePattern=*/std::false_type{});
}
}
{
Expand Down Expand Up @@ -996,11 +1010,11 @@ struct __invoke_single_group_copy_if
};

template <typename _ExecutionPolicy, typename _InRng, typename _OutRng, typename _Size, typename _GenMask,
typename _WriteOp>
typename _WriteOp, typename _IsUniquePattern>
auto
__parallel_reduce_then_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec,
_InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _GenMask __generate_mask,
_WriteOp __write_op)
_WriteOp __write_op, _IsUniquePattern __is_unique_pattern)
{
using _GenReduceInput = oneapi::dpl::__par_backend_hetero::__gen_count_mask<_GenMask>;
using _ReduceOp = std::plus<_Size>;
Expand All @@ -1011,7 +1025,7 @@ __parallel_reduce_then_scan_copy(oneapi::dpl::__internal::__device_backend_tag _
__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng),
std::forward<_OutRng>(__out_rng), _GenReduceInput{__generate_mask}, _ReduceOp{}, _GenScanInput{__generate_mask},
_ScanInputTransform{}, __write_op, oneapi::dpl::unseq_backend::__no_init_value<_Size>{},
/*_Inclusive=*/std::true_type{});
/*_Inclusive=*/std::true_type{}, __is_unique_pattern);
}

template <typename _ExecutionPolicy, typename _InRng, typename _OutRng, typename _Size, typename _CreateMaskOp,
Expand Down Expand Up @@ -1055,6 +1069,40 @@ __parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag
__copy_by_mask_op);
}

template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinaryPredicate,
typename _Assign = oneapi::dpl::__internal::__pstl_assign>
auto
__parallel_unique_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec,
_Range1&& __rng, _Range2&& __result, _BinaryPredicate __pred,
_Assign&& __assign = oneapi::dpl::__internal::__pstl_assign{})
{

auto __n = __rng.size();
if (oneapi::dpl::__par_backend_hetero::__prefer_reduce_then_scan(__exec))
{
using _GenMask = oneapi::dpl::__par_backend_hetero::__gen_unique_mask<_BinaryPredicate>;
using _WriteOp = oneapi::dpl::__par_backend_hetero::__write_to_idx_if<1, _Assign>;

return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec),
std::forward<_Range1>(__rng), std::forward<_Range2>(__result), __n,
_GenMask{__pred}, _WriteOp{std::forward<_Assign>(__assign)},
/*_IsUniquePattern=*/std::true_type{});
}
else
{

using _ReduceOp = std::plus<decltype(__n)>;
using _CreateOp = oneapi::dpl::__internal::__create_mask_unique_copy<oneapi::dpl::__internal::__not_pred<_BinaryPredicate>,
decltype(__n)>;
using _CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, _Assign, /*inclusive*/ std::true_type, 1>;

return __parallel_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec), std::forward<_Range1>(__rng),
std::forward<_Range2>(__result), __n,
_CreateOp{oneapi::dpl::__internal::__not_pred<_BinaryPredicate>{__pred}},
_CopyOp{_ReduceOp{}, std::forward<_Assign>(__assign)});
}
}

template <typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _UnaryPredicate>
auto
__parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec,
Expand All @@ -1069,7 +1117,7 @@ __parallel_partition_copy(oneapi::dpl::__internal::__device_backend_tag __backen

return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec),
std::forward<_Range1>(__rng), std::forward<_Range2>(__result), __n,
_GenMask{__pred}, _WriteOp{});
_GenMask{__pred}, _WriteOp{}, /*_IsUniquePattern=*/std::false_type{});
}
else
{
Expand Down Expand Up @@ -1120,7 +1168,8 @@ __parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag,

return __parallel_reduce_then_scan_copy(__backend_tag, std::forward<_ExecutionPolicy>(__exec),
std::forward<_InRng>(__in_rng), std::forward<_OutRng>(__out_rng), __n,
_GenMask{__pred}, _WriteOp{std::forward<_Assign>(__assign)});
_GenMask{__pred}, _WriteOp{std::forward<_Assign>(__assign)},
/*Unique=*/std::false_type{});
}
else
{
Expand Down
Loading

0 comments on commit 6a7bae0

Please sign in to comment.