Merge 44ff7d5 into c61370d

pika-org · Aug 4, 2022 · 6185de8 · 6185de8
2 parents c61370d + 44ff7d5
commit 6185de8
Show file tree

Hide file tree

Showing 10 changed files with 298 additions and 346 deletions.
diff --git a/libs/pika/algorithms/tests/unit/container_algorithms/foreach_tests.hpp b/libs/pika/algorithms/tests/unit/container_algorithms/foreach_tests.hpp
@@ -348,8 +348,8 @@ void test_for_each_sender(ExPolicy&& p, IteratorTag)
     auto rng = pika::util::make_iterator_range(
         iterator(std::begin(c)), iterator(std::end(c)));
     auto f = [](std::size_t& v) { v = 42; };
-    auto result = ex::just(rng, f) |
-        pika::ranges::for_each(std::forward<ExPolicy>(p)) | tt::sync_wait();
+    auto result = tt::sync_wait(
+        ex::just(rng, f) | pika::ranges::for_each(std::forward<ExPolicy>(p)));
     PIKA_TEST(result == iterator(std::end(c)));
 
     // verify values
@@ -380,8 +380,8 @@ void test_for_each_exception_sender(ExPolicy p, IteratorTag)
     bool caught_exception = false;
     try
     {
-        ex::just(rng, f) | pika::ranges::for_each(std::forward<ExPolicy>(p)) |
-            tt::sync_wait();
+        tt::sync_wait(ex::just(rng, f) |
+            pika::ranges::for_each(std::forward<ExPolicy>(p)));
 
         PIKA_TEST(false);
     }
@@ -417,8 +417,8 @@ void test_for_each_bad_alloc_sender(ExPolicy p, IteratorTag)
     bool caught_exception = false;
     try
     {
-        ex::just(rng, f) | pika::ranges::for_each(std::forward<ExPolicy>(p)) |
-            tt::sync_wait();
+        tt::sync_wait(ex::just(rng, f) |
+            pika::ranges::for_each(std::forward<ExPolicy>(p)));
 
         PIKA_TEST(false);
     }

diff --git a/libs/pika/async_cuda/tests/performance/synchronize.cu b/libs/pika/async_cuda/tests/performance/synchronize.cu
@@ -90,7 +90,8 @@ int pika_main(pika::program_options::variables_map& vm)
         pika::chrono::detail::high_resolution_timer timer;
         for (std::size_t i = 0; i != iterations; ++i)
         {
-            ex::schedule(sched) | cu::then_with_stream(f) | tt::sync_wait();
+            tt::sync_wait(
+                ex::sync_wait(ex::schedule(sched) | cu::then_with_stream(f)));
         }
         double elapsed = timer.elapsed();
         std::cout
@@ -111,17 +112,17 @@ int pika_main(pika::program_options::variables_map& vm)
             // We have to manually unroll this loop, because the type of the
             // sender changes for each additional then_with_stream call. The
             // number of unrolled calls must match batch_size above.
-            ex::schedule(sched) | cu::then_with_stream(f) |
+            tt::sync_wait(ex::schedule(sched) | cu::then_with_stream(f) |
                 cu::then_with_stream(f) | cu::then_with_stream(f) |
                 cu::then_with_stream(f) | cu::then_with_stream(f) |
                 cu::then_with_stream(f) | cu::then_with_stream(f) |
                 cu::then_with_stream(f) | cu::then_with_stream(f) |
-                cu::then_with_stream(f) | tt::sync_wait();
+                cu::then_with_stream(f));
         }
         // Do the remainder one-by-one
         for (std::size_t i = 0; i < non_batch_iterations; ++i)
         {
-            ex::schedule(sched) | cu::then_with_stream(f) | tt::sync_wait();
+            tt::sync_wait(ex::schedule(sched) | cu::then_with_stream(f));
         }
         double elapsed = timer.elapsed();
         std::cout
@@ -145,7 +146,7 @@ int pika_main(pika::program_options::variables_map& vm)
             // intentionally insert dummy then([]{}) calls between the
             // then_with_stream calls to force synchronization between the
             // kernel launches.
-            ex::schedule(sched) | cu::then_with_stream(f) |
+            tt::sync_wait(ex::schedule(sched) | cu::then_with_stream(f) |
                 ex::transfer(ex::thread_pool_scheduler{}) | ex::then([] {}) |
                 ex::transfer(sched) | cu::then_with_stream(f) |
                 ex::transfer(ex::thread_pool_scheduler{}) | ex::then([] {}) |
@@ -163,12 +164,12 @@ int pika_main(pika::program_options::variables_map& vm)
                 ex::transfer(ex::thread_pool_scheduler{}) | ex::then([] {}) |
                 ex::transfer(sched) | cu::then_with_stream(f) |
                 ex::transfer(ex::thread_pool_scheduler{}) | ex::then([] {}) |
-                ex::transfer(sched) | cu::then_with_stream(f) | tt::sync_wait();
+                ex::transfer(sched) | cu::then_with_stream(f));
         }
         // Do the remainder one-by-one
         for (std::size_t i = 0; i < non_batch_iterations; ++i)
         {
-            ex::schedule(sched) | cu::then_with_stream(f) | tt::sync_wait();
+            tt::sync_wait(ex::schedule(sched) | cu::then_with_stream(f));
         }
         double elapsed = timer.elapsed();
         std::cout
@@ -186,8 +187,8 @@ int pika_main(pika::program_options::variables_map& vm)
         pika::chrono::detail::high_resolution_timer timer;
         for (std::size_t i = 0; i != iterations; ++i)
         {
-            ex::schedule(sched) | cu::then_with_stream(f) |
-                ex::transfer(ex::thread_pool_scheduler{}) | tt::sync_wait();
+            tt::sync_wait(ex::schedule(sched) | cu::then_with_stream(f) |
+                ex::transfer(ex::thread_pool_scheduler{}));
         }
         double elapsed = timer.elapsed();
         std::cout
@@ -208,19 +209,19 @@ int pika_main(pika::program_options::variables_map& vm)
             // We have to manually unroll this loop, because the type of the
             // sender changes for each additional then_with_stream call. The
             // number of unrolled calls must match batch_size above.
-            ex::schedule(sched) | cu::then_with_stream(f) |
+            tt::sync_wait(ex::schedule(sched) | cu::then_with_stream(f) |
                 cu::then_with_stream(f) | cu::then_with_stream(f) |
                 cu::then_with_stream(f) | cu::then_with_stream(f) |
                 cu::then_with_stream(f) | cu::then_with_stream(f) |
                 cu::then_with_stream(f) | cu::then_with_stream(f) |
                 cu::then_with_stream(f) |
-                ex::transfer(ex::thread_pool_scheduler{}) | tt::sync_wait();
+                ex::transfer(ex::thread_pool_scheduler{}));
         }
         // Do the remainder one-by-one
         for (std::size_t i = 0; i < non_batch_iterations; ++i)
         {
-            ex::schedule(sched) | cu::then_with_stream(f) |
-                ex::transfer(ex::thread_pool_scheduler{}) | tt::sync_wait();
+            tt::sync_wait(ex::schedule(sched) | cu::then_with_stream(f) |
+                ex::transfer(ex::thread_pool_scheduler{}));
         }
         double elapsed = timer.elapsed();
         std::cout

diff --git a/libs/pika/async_cuda/tests/unit/then_with_stream.cu b/libs/pika/async_cuda/tests/unit/then_with_stream.cu
@@ -375,7 +375,7 @@ int pika_main()
     {
         cudaStream_t first_stream{};
         cudaStream_t second_stream{};
-        ex::schedule(cu::cuda_scheduler{pool}) |
+        tt::sync_wait(ex::schedule(cu::cuda_scheduler{pool}) |
             cu::then_with_stream(
                 [&](cudaStream_t stream) { first_stream = stream; }) |
             cu::then_with_stream([&](cudaStream_t stream) {
@@ -394,8 +394,7 @@ int pika_main()
             }) |
             cu::then_with_stream([&](cudaStream_t stream) {
                 PIKA_TEST_EQ(stream, second_stream);
-            }) |
-            tt::sync_wait();
+            }));
     }
 
     {
@@ -414,14 +413,15 @@ int pika_main()
             cu::then_with_stream(increment{}) |
             cu::then_with_stream(increment{}) |
             cu::then_with_stream(increment{});
-        ex::when_all(ex::just(&p_h), std::move(s), ex::just(sizeof(type)),
-            ex::just(cudaMemcpyDeviceToHost)) |
+        tt::sync_wait(
+            ex::when_all(ex::just(&p_h), std::move(s), ex::just(sizeof(type)),
+                ex::just(cudaMemcpyDeviceToHost)) |
             ex::transfer(cu::cuda_scheduler{pool}) |
             cu::then_with_stream(cuda_memcpy_async{}) |
             ex::transfer(ex::thread_pool_scheduler{}) |
             ex::then(&cu::check_cuda_error) |
             ex::then([&p_h] { PIKA_TEST_EQ(p_h, 3); }) |
-            ex::transfer(ex::thread_pool_scheduler{}) | tt::sync_wait();
+            ex::transfer(ex::thread_pool_scheduler{}));
 
         cu::check_cuda_error(cudaFree(p));
     }

diff --git a/libs/pika/async_mpi/tests/unit/algorithm_transform_mpi.cpp b/libs/pika/async_mpi/tests/unit/algorithm_transform_mpi.cpp
@@ -138,8 +138,9 @@ int pika_main()
                 {
                     data = 42;
                 }
-                auto result = ex::just(&data, count, datatype, 0, comm) |
-                    mpi::transform_mpi(MPI_Ibcast) | tt::sync_wait();
+                auto result =
+                    tt::sync_wait(ex::just(&data, count, datatype, 0, comm) |
+                        mpi::transform_mpi(MPI_Ibcast));
                 if (rank != 0)
                 {
                     PIKA_TEST_EQ(data, 42);
@@ -156,10 +157,9 @@ int pika_main()
                 bool exception_thrown = false;
                 try
                 {
-                    mpi::transform_mpi(
+                    tt::sync_wait(mpi::transform_mpi(
                         error_sender<int*, int, MPI_Datatype, int, MPI_Comm>{},
-                        MPI_Ibcast) |
-                        tt::sync_wait();
+                        MPI_Ibcast));
                     PIKA_TEST(false);
                 }
                 catch (std::runtime_error const& e)
@@ -203,9 +203,8 @@ int pika_main()
                 bool exception_thrown = false;
                 try
                 {
-                    mpi::transform_mpi(
-                        ex::just(data, count, datatype, -1, comm), MPI_Ibcast) |
-                        tt::sync_wait();
+                    tt::sync_wait(mpi::transform_mpi(
+                        ex::just(data, count, datatype, -1, comm), MPI_Ibcast));
                     PIKA_TEST(false);
                 }
                 catch (std::runtime_error const& e)
@@ -228,9 +227,8 @@ int pika_main()
                 bool exception_thrown = false;
                 try
                 {
-                    mpi::transform_mpi(
-                        ex::just(data, count, datatype, -1, comm), MPI_Ibcast) |
-                        tt::sync_wait();
+                    tt::sync_wait(mpi::transform_mpi(
+                        ex::just(data, count, datatype, -1, comm), MPI_Ibcast));
                     PIKA_TEST(false);
                 }
                 catch (std::runtime_error const&)

diff --git a/libs/pika/execution/include/pika/execution/algorithms/start_detached.hpp b/libs/pika/execution/include/pika/execution/algorithms/start_detached.hpp
@@ -16,7 +16,6 @@
 #include <pika/allocator_support/traits/is_allocator.hpp>
 #include <pika/assert.hpp>
 #include <pika/concepts/concepts.hpp>
-#include <pika/execution/algorithms/detail/partial_algorithm.hpp>
 #include <pika/execution_base/operation_state.hpp>
 #include <pika/execution_base/sender.hpp>
 #include <pika/functional/detail/tag_fallback_invoke.hpp>
@@ -154,19 +153,6 @@ namespace pika { namespace execution { namespace experimental {
                 operation_state_type{PIKA_FORWARD(Sender, sender), alloc};
             PIKA_UNUSED(p.release());
         }
-
-        // clang-format off
-        template <typename Allocator = pika::detail::internal_allocator<>,
-            PIKA_CONCEPT_REQUIRES_(
-                pika::detail::is_allocator_v<Allocator>
-            )>
-        // clang-format on
-        friend constexpr PIKA_FORCEINLINE auto tag_fallback_invoke(
-            start_detached_t, Allocator const& allocator = Allocator{})
-        {
-            return detail::partial_algorithm<start_detached_t, Allocator>{
-                allocator};
-        }
     } start_detached{};
 }}}    // namespace pika::execution::experimental
 #endif
diff --git a/libs/pika/execution/include/pika/execution/algorithms/sync_wait.hpp b/libs/pika/execution/include/pika/execution/algorithms/sync_wait.hpp
@@ -14,7 +14,6 @@
 
 #include <pika/concepts/concepts.hpp>
 #include <pika/datastructures/variant.hpp>
-#include <pika/execution/algorithms/detail/partial_algorithm.hpp>
 #include <pika/execution/algorithms/detail/single_result.hpp>
 #include <pika/execution_base/operation_state.hpp>
 #include <pika/execution_base/receiver.hpp>
@@ -268,11 +267,5 @@ namespace pika::this_thread::experimental {
             state.wait();
             return state.get_value();
         }
-
-        friend constexpr PIKA_FORCEINLINE auto tag_fallback_invoke(sync_wait_t)
-        {
-            return pika::execution::experimental::detail::partial_algorithm<
-                sync_wait_t>{};
-        }
     } sync_wait{};
 }    // namespace pika::this_thread::experimental
diff --git a/libs/pika/execution/tests/unit/algorithm_sync_wait.cpp b/libs/pika/execution/tests/unit/algorithm_sync_wait.cpp
@@ -72,16 +72,15 @@ int pika_main()
         std::atomic<bool> start_called{false};
         std::atomic<bool> connect_called{false};
         std::atomic<bool> tag_invoke_overload_called{false};
-        custom_sender{
-            start_called, connect_called, tag_invoke_overload_called} |
-            tt::sync_wait();
+        tt::sync_wait(custom_sender{
+            start_called, connect_called, tag_invoke_overload_called});
         PIKA_TEST(start_called);
         PIKA_TEST(connect_called);
         PIKA_TEST(!tag_invoke_overload_called);
     }
 
     {
-        PIKA_TEST_EQ(ex::just(3) | tt::sync_wait(), 3);
+        PIKA_TEST_EQ(tt::sync_wait(ex::just(3)), 3);
     }
 
     // tag_invoke overload