Skip to content

Commit

Permalink
Implement execution::unseq. Resolves microsoftGH-44.
Browse files Browse the repository at this point in the history
<execution>
* Add sequenced_policy and unseq.
* Mark sequenced_policy as being an execution policy.
* Add detection for this new policy to std::for_each and std::for_each_n, and use #pragma loop(ivdep) when supplied. We are not marking other algorithms because all other algorithms have something that makes the operative loop body not actually independent and the docs for #pragma loop(ivdep) suggest that is not allowed.
* Remove #pragma loop(ivdep) from std::transform because transform is callable such that _Dest == _First1 or _Dest == _First2.

<yvals_core.h>
* Mark proposal as implemented and change __cpp_lib_execution when C++20 is turned on.

instantiate_algorithms.hpp:
* Add unseq to execution policy matricies.

P0024R2_parallel_algorithms_for_each:
* Add testing for unseq.

VSO_0157762_feature_test_macros:
* Update test for new value of __cpp_lib_execution.
  • Loading branch information
BillyONeal committed Jul 30, 2020
1 parent 35ce1cf commit 4265c10
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 53 deletions.
83 changes: 47 additions & 36 deletions stl/inc/execution
Original file line number Diff line number Diff line change
Expand Up @@ -75,42 +75,71 @@ constexpr size_t _Still_active = static_cast<size_t>(-1);

// EXECUTION POLICIES
namespace execution {
class sequenced_policy { // request for sequential execution with termination
class sequenced_policy {
// indicates support for only sequential execution, and request termination on exceptions
public:
using _Standard_execution_policy = int;
static constexpr bool _Parallelize = false;
static constexpr bool _Ivdep = false;
};

inline constexpr sequenced_policy seq{/* unspecified */};

class parallel_policy { // request for parallel execution with termination
class parallel_policy {
// indicates support by element access functions for execution with parallel forward progress guarantees and
// requests termination on exceptions
public:
using _Standard_execution_policy = int;
static constexpr bool _Parallelize = true;
static constexpr bool _Ivdep = true;
};

inline constexpr parallel_policy par{/* unspecified */};

class parallel_unsequenced_policy {
// request for parallel execution without thread identity with termination
// indicates support by element access functions for parallel execution with weakly parallel forward progress
// guarantees, and requests termination on exceptions
//
// (at this time, equivalent to parallel_policy)
public:
using _Standard_execution_policy = int;
static constexpr bool _Parallelize = true;
static constexpr bool _Ivdep = true;
};

inline constexpr parallel_unsequenced_policy par_unseq{/* unspecified */};

#if _HAS_CXX20
class unsequenced_policy {
// indicates support by element access functions for weakly parallel forward progress guarantees, and for
// executing interleaved on the same thread, and requests termination on exceptions
//
// (at this time, equivalent to sequenced_policy except for the for_each family)
public:
using _Standard_execution_policy = int;
static constexpr bool _Parallelize = false;
static constexpr bool _Ivdep = true;
};

inline constexpr unsequenced_policy unseq{/* unspecified */};
#endif // _HAS_CXX20

} // namespace execution

// All of the above are execution policies:
template <>
struct is_execution_policy<execution::sequenced_policy> : true_type {}; // sequenced_policy is an execution policy
struct is_execution_policy<execution::sequenced_policy> : true_type {};

template <>
struct is_execution_policy<execution::parallel_policy> : true_type {}; // parallel_policy is an execution policy
struct is_execution_policy<execution::parallel_policy> : true_type {};

template <>
struct is_execution_policy<execution::parallel_unsequenced_policy> : true_type {
}; // parallel_unsequenced_policy is an execution policy
struct is_execution_policy<execution::parallel_unsequenced_policy> : true_type {};

#if _HAS_CXX20
template <>
struct is_execution_policy<execution::unsequenced_policy> : true_type {};
#endif // _HAS_CXX20

// STRUCT _Parallelism_resources_exhausted
struct _Parallelism_resources_exhausted : exception {
Expand Down Expand Up @@ -1216,6 +1245,8 @@ void for_each(_ExPo&&, _FwdIt _First, _FwdIt _Last, _Fn _Func) noexcept /* termi
}
}

_For_each_ivdep(_UFirst, _ULast, _Pass_fn(_Func));
} else if constexpr (remove_reference_t<_ExPo>::_Parallelize) {
_For_each_ivdep(_UFirst, _ULast, _Pass_fn(_Func));
} else {
for (; _UFirst != _ULast; ++_UFirst) {
Expand Down Expand Up @@ -1258,6 +1289,8 @@ _FwdIt for_each_n(_ExPo&&, _FwdIt _First, const _Diff _Count_raw, _Fn _Func) noe
_CATCH_END
}

_Seek_wrapped(_First, _For_each_n_ivdep(_UFirst, _Count, _Pass_fn(_Func)));
} else if constexpr (remove_reference_t<_ExPo>::_Ivdep) {
_Seek_wrapped(_First, _For_each_n_ivdep(_UFirst, _Count, _Pass_fn(_Func)));
} else {
for (; 0 < _Count; --_Count, (void) ++_UFirst) {
Expand Down Expand Up @@ -2281,17 +2314,6 @@ _NODISCARD _FwdIt search_n(_ExPo&&, const _FwdIt _First, _FwdIt _Last, const _Di
}

// PARALLEL FUNCTION TEMPLATE transform
template <class _FwdIt1, class _FwdIt2, class _Fn>
_FwdIt2 _Transform_ivdep(_FwdIt1 _First, const _FwdIt1 _Last, _FwdIt2 _Dest, _Fn _Func) {
// unary op transform with independent loop bodies
#pragma loop(ivdep)
for (; _First != _Last; ++_First, (void) ++_Dest) {
*_Dest = _Func(*_First);
}

return _Dest;
}

template <class _FwdIt1, class _FwdIt2, class _Fn>
struct _Static_partitioned_unary_transform2 {
using _Diff = _Common_diff_t<_FwdIt1, _FwdIt2>;
Expand All @@ -2311,7 +2333,7 @@ struct _Static_partitioned_unary_transform2 {
const auto _Key = _Team._Get_next_key();
if (_Key) {
const auto _Source = _Source_basis._Get_chunk(_Key);
_Transform_ivdep(_Source._First, _Source._Last, _Dest_basis._Get_chunk(_Key)._First, _Func);
_STD transform(_Source._First, _Source._Last, _Dest_basis._Get_chunk(_Key)._First, _Func);
return _Cancellation_status::_Running;
}

Expand Down Expand Up @@ -2349,12 +2371,12 @@ _FwdIt2 transform(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, _FwdIt2 _D
_CATCH_END
}

_Seek_wrapped(_Dest, _Transform_ivdep(_UFirst, _ULast, _UDest, _Pass_fn(_Func)));
_Seek_wrapped(_Dest, _STD transform(_UFirst, _ULast, _UDest, _Pass_fn(_Func)));
return _Dest;
} else {
_Seek_wrapped(
_Dest, _Transform_ivdep(_UFirst, _ULast,
_Get_unwrapped_n(_Dest, _Idl_distance<_FwdIt1>(_UFirst, _ULast)), _Pass_fn(_Func)));
_Dest, _STD transform(_UFirst, _ULast, _Get_unwrapped_n(_Dest, _Idl_distance<_FwdIt1>(_UFirst, _ULast)),
_Pass_fn(_Func)));
return _Dest;
}
} else {
Expand All @@ -2364,17 +2386,6 @@ _FwdIt2 transform(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, _FwdIt2 _D
}
}

template <class _FwdIt1, class _FwdIt2, class _FwdIt3, class _Fn>
_FwdIt3 _Transform_ivdep(_FwdIt1 _First1, const _FwdIt1 _Last1, _FwdIt2 _First2, _FwdIt3 _Dest, _Fn _Func) {
// binary op transform with independent loop bodies
#pragma loop(ivdep)
for (; _First1 != _Last1; ++_First1, (void) ++_First2, ++_Dest) {
*_Dest = _Func(*_First1, *_First2);
}

return _Dest;
}

template <class _FwdIt1, class _FwdIt2, class _FwdIt3, class _Fn>
struct _Static_partitioned_binary_transform2 {
using _Diff = _Common_diff_t<_FwdIt1, _FwdIt2, _FwdIt3>;
Expand All @@ -2396,7 +2407,7 @@ struct _Static_partitioned_binary_transform2 {
const auto _Key = _Team._Get_next_key();
if (_Key) {
const auto _Source1 = _Source1_basis._Get_chunk(_Key);
_Transform_ivdep(_Source1._First, _Source1._Last, _Source2_basis._Get_chunk(_Key)._First,
_STD transform(_Source1._First, _Source1._Last, _Source2_basis._Get_chunk(_Key)._First,
_Dest_basis._Get_chunk(_Key)._First, _Func);
return _Cancellation_status::_Running;
}
Expand Down Expand Up @@ -2442,11 +2453,11 @@ _FwdIt3 transform(_ExPo&&, const _FwdIt1 _First1, const _FwdIt1 _Last1, const _F
_CATCH_END
}

_Seek_wrapped(_Dest, _Transform_ivdep(_UFirst1, _ULast1, _UFirst2, _UDest, _Pass_fn(_Func)));
_Seek_wrapped(_Dest, _STD transform(_UFirst1, _ULast1, _UFirst2, _UDest, _Pass_fn(_Func)));
return _Dest;
} else {
const auto _Count = _Idl_distance<_FwdIt1>(_UFirst1, _ULast1);
_Seek_wrapped(_Dest, _Transform_ivdep(_UFirst1, _ULast1, _Get_unwrapped_n(_First2, _Count),
_Seek_wrapped(_Dest, _STD transform(_UFirst1, _ULast1, _Get_unwrapped_n(_First2, _Count),
_Get_unwrapped_n(_Dest, _Count), _Pass_fn(_Func)));
return _Dest;
}
Expand Down
18 changes: 14 additions & 4 deletions stl/inc/yvals_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@
// (partially implemented, missing noop coroutines)
// P0919R3 Heterogeneous Lookup For Unordered Containers
// P0966R1 string::reserve() Should Not Shrink
// P1001R2 execution::unseq
// P1006R1 constexpr For pointer_traits<T*>::pointer_to()
// P1023R0 constexpr For std::array Comparisons
// P1024R3 Enhancing span Usability
Expand Down Expand Up @@ -225,6 +226,10 @@
// C++ allows an implementation to implement parallel algorithms as calls to the serial algorithms.
// This implementation parallelizes several common algorithm calls, but not all.
//
// std::execution::unseq has no direct analogue for any optimizer we target as of 2020-07-29,
// though we will map it to #pragma loop(ivdep) for the for_each algorithms only as these are the only algorithms where
// the library does not need to introduce inter-loop-body dependencies to accomplish the algorithm's goals.
//
// The following algorithms are parallelized.
// * adjacent_difference
// * adjacent_find
Expand Down Expand Up @@ -1091,10 +1096,7 @@
#if _HAS_STD_BYTE
#define __cpp_lib_byte 201603L
#endif // _HAS_STD_BYTE
#define __cpp_lib_clamp 201603L
#ifndef _M_CEE
#define __cpp_lib_execution 201603L
#endif // _M_CEE
#define __cpp_lib_clamp 201603L
#define __cpp_lib_filesystem 201703L
#define __cpp_lib_gcd_lcm 201606L
#define __cpp_lib_hardware_interference_size 201703L
Expand Down Expand Up @@ -1184,6 +1186,14 @@
#define __cpp_lib_unwrap_ref 201811L
#endif // _HAS_CXX20

#ifndef _M_CEE
#if _HAS_CXX17
#define __cpp_lib_execution 201603L // P0024R2 Parallel Algorithms
#elif _HAS_CXX20
#define __cpp_lib_execution 201902L // P1001R2 execution::unseq
#endif
#endif // _M_CEE

#if _HAS_CXX20
#define __cpp_lib_array_constexpr 201811L // P1032R1 Miscellaneous constexpr
#elif _HAS_CXX17 // ^^^ _HAS_CXX20 / _HAS_CXX17 vvv
Expand Down
18 changes: 18 additions & 0 deletions tests/std/include/instantiate_algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,9 @@ namespace std_testing {
test_exec_fwd1_fwd2(std::execution::seq, fwd1, fwd2);
test_exec_fwd1_fwd2(std::execution::par, fwd1, fwd2);
test_exec_fwd1_fwd2(std::execution::par_unseq, fwd1, fwd2);
#if _HAS_CXX20
test_exec_fwd1_fwd2(std::execution::unseq, fwd1, fwd2);
#endif // _HAS_CXX20
#endif // HAS_PARALLEL_ALGORITHMS

(void) std::find_end(fwd1, fwd1, fwd2, fwd2);
Expand Down Expand Up @@ -503,6 +506,9 @@ namespace std_testing {
test_exec_fwd1(std::execution::seq, fwd1);
test_exec_fwd1(std::execution::par, fwd1);
test_exec_fwd1(std::execution::par_unseq, fwd1);
#if _HAS_CXX20
test_exec_fwd1(std::execution::unseq, fwd1);
#endif // _HAS_CXX20
#endif // HAS_PARALLEL_ALGORITHMS

test_fwd1_fwd2(fwd1, FWDIT);
Expand Down Expand Up @@ -592,6 +598,9 @@ namespace std_testing {
test_exec_bid1_bid2_xxx_backward(std::execution::seq, bid1, bid2);
test_exec_bid1_bid2_xxx_backward(std::execution::par, bid1, bid2);
test_exec_bid1_bid2_xxx_backward(std::execution::par_unseq, bid1, bid2);
#if _HAS_CXX20
test_exec_bid1_bid2_xxx_backward(std::execution::unseq, bid1, bid2);
#endif // _HAS_CXX20
#endif // HAS_PARALLEL_ALGORITHMS

std::copy_backward(bid1, bid1, bid2);
Expand All @@ -615,6 +624,9 @@ namespace std_testing {
test_exec_bid1_fwd1(std::execution::seq, bid1, fwd1);
test_exec_bid1_fwd1(std::execution::par, bid1, fwd1);
test_exec_bid1_fwd1(std::execution::par_unseq, bid1, fwd1);
#if _HAS_CXX20
test_exec_bid1_fwd1(std::execution::unseq, bid1, fwd1);
#endif // _HAS_CXX20
}

template <typename Bid1, typename ExecutionPolicy>
Expand Down Expand Up @@ -653,6 +665,9 @@ namespace std_testing {
test_exec_bid1(std::execution::seq, bid1);
test_exec_bid1(std::execution::par, bid1);
test_exec_bid1(std::execution::par_unseq, bid1);
#if _HAS_CXX20
test_exec_bid1(std::execution::unseq, bid1);
#endif // _HAS_CXX20
#endif // HAS_PARALLEL_ALGORITHMS

std::reverse(bid1, bid1);
Expand Down Expand Up @@ -700,6 +715,9 @@ namespace std_testing {
test_exec_ran(std::execution::seq, ran);
test_exec_ran(std::execution::par, ran);
test_exec_ran(std::execution::par_unseq, ran);
#if _HAS_CXX20
test_exec_ran(std::execution::unseq, ran);
#endif // _HAS_CXX20
#endif // HAS_PARALLEL_ALGORITHMS

#if _HAS_AUTO_PTR_ETC
Expand Down
36 changes: 24 additions & 12 deletions tests/std/tests/P0024R2_parallel_algorithms_for_each/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,27 +47,39 @@ const auto call_only_once = [](atomic<bool>& b) { assert(!b.exchange(true)); };

const auto atomic_identity = [](atomic<bool>& b) { return b.load(); };

template <template <class...> class Container>
void test_case_for_each_parallel(const size_t testSize) {
template <typename ExecutionPolicy, template <class...> class Container>
void test_case_for_each_parallel(const size_t testSize, const ExecutionPolicy& exec) {
Container<atomic<bool>> c(testSize);
for_each(par, c.begin(), c.end(), call_only_once);
for_each(exec, c.begin(), c.end(), call_only_once);
assert(all_of(c.begin(), c.end(), atomic_identity));
}

template <template <class...> class Container>
void test_case_for_each_n_parallel(const size_t testSize) {
template <typename ExecutionPolicy, template <class...> class Container>
void test_case_for_each_n_parallel(const size_t testSize, const ExecutionPolicy& exec) {
Container<atomic<bool>> c(testSize);
auto result = for_each_n(par, c.begin(), testSize, call_only_once);
auto result = for_each_n(exec, c.begin(), testSize, call_only_once);
assert(result == c.end());
assert(all_of(c.begin(), c.end(), atomic_identity));
}

int main() {
test_case_for_each_n();
parallel_test_case(test_case_for_each_parallel<forward_list>);
parallel_test_case(test_case_for_each_parallel<list>);
parallel_test_case(test_case_for_each_parallel<vector>);
parallel_test_case(test_case_for_each_n_parallel<forward_list>);
parallel_test_case(test_case_for_each_n_parallel<list>);
parallel_test_case(test_case_for_each_n_parallel<vector>);
parallel_test_case(test_case_for_each_parallel<std::execution::parallel_policy, forward_list>, std::execution::par);
parallel_test_case(test_case_for_each_parallel<std::execution::parallel_policy, list>, std::execution::par);
parallel_test_case(test_case_for_each_parallel<std::execution::parallel_policy, vector>, std::execution::par);
parallel_test_case(
test_case_for_each_n_parallel<std::execution::parallel_policy, forward_list>, std::execution::par);
parallel_test_case(test_case_for_each_n_parallel<std::execution::parallel_policy, list>, std::execution::par);
parallel_test_case(test_case_for_each_n_parallel<std::execution::parallel_policy, vector>, std::execution::par);
#if _HAS_CXX20
parallel_test_case(
test_case_for_each_parallel<std::execution::unsequenced_policy, forward_list>, std::execution::unseq);
parallel_test_case(test_case_for_each_parallel<std::execution::unsequenced_policy, list>, std::execution::unseq);
parallel_test_case(test_case_for_each_parallel<std::execution::unsequenced_policy, vector>, std::execution::unseq);
parallel_test_case(
test_case_for_each_n_parallel<std::execution::unsequenced_policy, forward_list>, std::execution::unseq);
parallel_test_case(test_case_for_each_n_parallel<std::execution::unsequenced_policy, list>, std::execution::unseq);
parallel_test_case(
test_case_for_each_n_parallel<std::execution::unsequenced_policy, vector>, std::execution::unseq);
#endif // _HAS_CXX20
}
10 changes: 9 additions & 1 deletion tests/std/tests/VSO_0157762_feature_test_macros/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,15 @@ STATIC_ASSERT(__cpp_lib_erase_if == 202002L);
STATIC_ASSERT(__cpp_lib_exchange_function == 201304L);
#endif

#if _HAS_CXX17 && !defined(_M_CEE)
#if _HAS_CXX20 && !defined(_M_CEE)
#ifndef __cpp_lib_execution
#error __cpp_lib_execution is not defined
#elif __cpp_lib_execution != 201902L
#error __cpp_lib_execution is not 201902L
#else
STATIC_ASSERT(__cpp_lib_execution == 201902L);
#endif
#elif _HAS_CXX17 && !defined(_M_CEE)
#ifndef __cpp_lib_execution
#error __cpp_lib_execution is not defined
#elif __cpp_lib_execution != 201603L
Expand Down

0 comments on commit 4265c10

Please sign in to comment.