Skip to content

Commit

Permalink
Use drop_operation_state to avoid stack overflows (#1004)
Browse files Browse the repository at this point in the history
  • Loading branch information
aurianer authored Dec 12, 2023
1 parent 72b5210 commit 3c70c1d
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 9 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ else()
endif()

# ----- pika
find_package(pika 0.18.0 REQUIRED)
find_package(pika 0.19.1 REQUIRED)

# ----- BLASPP/LAPACKPP
find_package(blaspp REQUIRED)
Expand Down
9 changes: 9 additions & 0 deletions ci/cpu/gcc11_debug_stdexec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,12 @@ cpu gcc11 stdexec debug build:
- cpu gcc11 stdexec debug deps
variables:
DEPLOY_IMAGE: $CSCS_REGISTRY_PATH/cpu-gcc11-stdexec-debug/deploy:$CI_COMMIT_SHA

cpu gcc11 stdexec debug test:
extends: .run_common
needs:
- cpu gcc11 stdexec debug build
trigger:
include:
- artifact: pipeline.yml
job: cpu gcc11 stdexec debug build
2 changes: 1 addition & 1 deletion ci/docker/debug-cpu-stdexec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@ spack:
- 'malloc=system'
stdexec:
require:
- '@git.48c52df0f81c6151eecf4f39fa5eed2dc0216204=main'
- '@git.nvhpc-23.09.rc4=main'
- 'build_type=Debug'
4 changes: 2 additions & 2 deletions include/dlaf/eigensolver/band_to_tridiag/mc.h
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ TridiagResult<T, Device::CPU> BandToTridiag<Backend::MC, D, T>::call_L(
ex::when_all_vector(matrix::select(mat_v, common::iterate_range2d(LocalTileIndex{i, i},
LocalTileSize{n - i, 1}))) |
ex::then([](TileVector&& vector) { return std::make_shared<TileVector>(std::move(vector)); }) |
ex::split();
ex::drop_operation_state() | ex::split();
}

ex::when_all(std::move(sem_sender), ex::just(sem_next, sweep), w_pipeline(), tiles_v) |
Expand Down Expand Up @@ -1339,7 +1339,7 @@ TridiagResult<T, Device::CPU> BandToTridiag<Backend::MC, D, T>::call_L(
if (sweep % b == 0) {
tile_v = panel_v.readwrite(LocalTileIndex{id_block_local, 0}) |
ex::then([](Tile&& tile) { return std::make_shared<Tile>(std::move(tile)); }) |
ex::split();
ex::drop_operation_state() | ex::split();
}

ex::unique_any_sender<SemaphorePtr> sem_sender;
Expand Down
13 changes: 9 additions & 4 deletions include/dlaf/sender/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ template <TransformDispatchType Tag = TransformDispatchType::Plain, Backend B =
typename F = void, typename Sender = void,
typename = std::enable_if_t<pika::execution::experimental::is_sender_v<Sender>>>
[[nodiscard]] decltype(auto) transform(const Policy<B> policy, F&& f, Sender&& sender) {
using pika::execution::experimental::drop_operation_state;
using pika::execution::experimental::then;
using pika::execution::experimental::transfer;

Expand All @@ -61,7 +62,8 @@ template <TransformDispatchType Tag = TransformDispatchType::Plain, Backend B =
using dlaf::common::internal::Unwrapping;

if constexpr (B == Backend::MC) {
return then(std::move(transfer_sender), ConsumeRvalues{Unwrapping{std::forward<F>(f)}});
return then(std::move(transfer_sender), ConsumeRvalues{Unwrapping{std::forward<F>(f)}}) |
drop_operation_state();
}
else if constexpr (B == Backend::GPU) {
#if defined(DLAF_WITH_GPU)
Expand All @@ -71,15 +73,18 @@ template <TransformDispatchType Tag = TransformDispatchType::Plain, Backend B =

if constexpr (Tag == TransformDispatchType::Plain) {
return then_with_stream(std::move(transfer_sender),
ConsumeRvalues{Unwrapping{std::forward<F>(f)}});
ConsumeRvalues{Unwrapping{std::forward<F>(f)}}) |
drop_operation_state();
}
else if constexpr (Tag == TransformDispatchType::Blas) {
return then_with_cublas(std::move(transfer_sender), ConsumeRvalues{Unwrapping{std::forward<F>(f)}},
CUBLAS_POINTER_MODE_HOST);
CUBLAS_POINTER_MODE_HOST) |
drop_operation_state();
}
else if constexpr (Tag == TransformDispatchType::Lapack) {
return then_with_cusolver(std::move(transfer_sender),
ConsumeRvalues{Unwrapping{std::forward<F>(f)}});
ConsumeRvalues{Unwrapping{std::forward<F>(f)}}) |
drop_operation_state();
}
else {
DLAF_STATIC_FAIL(
Expand Down
5 changes: 4 additions & 1 deletion include/dlaf/sender/transform_mpi.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

#include <type_traits>

#include <pika/execution.hpp>

#include <dlaf/common/consume_rvalues.h>
#include <dlaf/common/pipeline.h>
#include <dlaf/common/unwrap.h>
Expand Down Expand Up @@ -91,7 +93,8 @@ template <typename F, typename Sender,
return ex::transfer(std::forward<Sender>(sender),
ex::with_priority(dlaf::internal::getMPIScheduler(),
pika::execution::thread_priority::boost)) |
ex::then(dlaf::common::internal::ConsumeRvalues{MPICallHelper{std::forward<F>(f)}});
ex::then(dlaf::common::internal::ConsumeRvalues{MPICallHelper{std::forward<F>(f)}}) |
ex::drop_operation_state();
}

/// Fire-and-forget transformMPI. This submits the work and returns void.
Expand Down
1 change: 1 addition & 0 deletions spack/packages/dla-future/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class DlaFuture(CMakePackage, CudaPackage, ROCmPackage):
depends_on("pika@0.16:", when="@0.2.0")
depends_on("pika@0.17:", when="@0.2.1")
depends_on("pika@0.18:", when="@0.3.0:")
depends_on("pika@0.19.1:", when="@master")
depends_on("pika-algorithms@0.1:", when="@:0.2")
depends_on("pika +mpi")
depends_on("pika +cuda", when="+cuda")
Expand Down

0 comments on commit 3c70c1d

Please sign in to comment.