From a4f9fae5cb0341eb058ed46f28cb56c33863dbc3 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 19 Mar 2024 17:04:25 +0100 Subject: [PATCH 01/33] Add miniapps as tests --- cmake/DLAF_AddTest.cmake | 93 +++++++++++++++++++++++++--------------- miniapp/CMakeLists.txt | 36 +++++++++++----- 2 files changed, 84 insertions(+), 45 deletions(-) diff --git a/cmake/DLAF_AddTest.cmake b/cmake/DLAF_AddTest.cmake index 5b8bd2ab58..9a67ff0650 100644 --- a/cmake/DLAF_AddTest.cmake +++ b/cmake/DLAF_AddTest.cmake @@ -57,72 +57,68 @@ function(_set_element_to_fallback_value LIST_NAME ELEMENT_REGEX FALLBACK) endif() endfunction() -function(DLAF_addTest test_target_name) +function(DLAF_addTargetTest test_target_name) set(options "") set(oneValueArgs MPIRANKS USE_MAIN) - set(multiValueArgs SOURCES COMPILE_DEFINITIONS INCLUDE_DIRS LIBRARIES ARGUMENTS) - cmake_parse_arguments(DLAF_AT "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(multiValueArgs ARGUMENTS) + cmake_parse_arguments(DLAF_ATT "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) ### Checks - if(DLAF_AT_UNPARSED_ARGUMENTS) - message(FATAL_ERROR "Unknown arguments ${DLAF_AT_UNPARSED_ARGUMENTS}") - endif() - - if(NOT DLAF_AT_SOURCES) - message(FATAL_ERROR "No sources specified for this test") + if(DLAF_ATT_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Unknown arguments ${DLAF_ATT_UNPARSED_ARGUMENTS}") endif() set(IS_AN_MPI_TEST FALSE) set(IS_AN_PIKA_TEST FALSE) - if(NOT DLAF_AT_USE_MAIN) + if(NOT DLAF_ATT_USE_MAIN) set(_gtest_tgt gtest) - elseif(DLAF_AT_USE_MAIN STREQUAL PLAIN) + elseif(DLAF_ATT_USE_MAIN STREQUAL PLAIN) set(_gtest_tgt gtest_main) - elseif(DLAF_AT_USE_MAIN STREQUAL PIKA) + elseif(DLAF_ATT_USE_MAIN STREQUAL PIKA) set(_gtest_tgt DLAF_gtest_pika_main) set(IS_AN_PIKA_TEST TRUE) - elseif(DLAF_AT_USE_MAIN STREQUAL MPI) + elseif(DLAF_ATT_USE_MAIN STREQUAL MPI) set(_gtest_tgt DLAF_gtest_mpi_main) set(IS_AN_MPI_TEST TRUE) - elseif(DLAF_AT_USE_MAIN STREQUAL MPIPIKA) + elseif(DLAF_ATT_USE_MAIN STREQUAL MPIPIKA) set(_gtest_tgt DLAF_gtest_mpipika_main) set(IS_AN_MPI_TEST TRUE) set(IS_AN_PIKA_TEST TRUE) - elseif(DLAF_AT_USE_MAIN STREQUAL CAPI) + elseif(DLAF_ATT_USE_MAIN STREQUAL CAPI) set(_gtest_tgt DLAF_gtest_mpi_main) set(IS_AN_MPI_TEST TRUE) else() - message(FATAL_ERROR "USE_MAIN=${DLAF_AT_USE_MAIN} is not a supported option") + message(FATAL_ERROR "USE_MAIN=${DLAF_ATT_USE_MAIN} is not a supported option") endif() if(IS_AN_MPI_TEST) - if(NOT DLAF_AT_MPIRANKS) + if(NOT DLAF_ATT_MPIRANKS) message(FATAL_ERROR "You are asking for an MPI external main without specifying MPIRANKS") endif() - if(NOT DLAF_AT_MPIRANKS GREATER 0) - message(FATAL_ERROR "Wrong MPIRANKS number ${DLAF_AT_MPIRANKS}") + if(NOT DLAF_ATT_MPIRANKS GREATER 0) + message(FATAL_ERROR "Wrong MPIRANKS number ${DLAF_ATT_MPIRANKS}") endif() - if(DLAF_AT_MPIRANKS GREATER MPIEXEC_MAX_NUMPROCS) + if(DLAF_ATT_MPIRANKS GREATER MPIEXEC_MAX_NUMPROCS) message( WARNING "\ - YOU ARE ASKING FOR ${DLAF_AT_MPIRANKS} RANKS, BUT THERE ARE JUST ${MPIEXEC_MAX_NUMPROCS} CORES. + YOU ARE ASKING FOR ${DLAF_ATT_MPIRANKS} RANKS, BUT THERE ARE JUST ${MPIEXEC_MAX_NUMPROCS} CORES. You can adjust MPIEXEC_MAX_NUMPROCS value to suppress this warning. Using OpenMPI may require to set the environment variable OMPI_MCA_rmaps_base_oversubscribe=1." ) endif() else() - if(DLAF_AT_MPIRANKS) + if(DLAF_ATT_MPIRANKS) message(FATAL_ERROR "You specified MPIRANKS and asked for an external main without MPI") else() - set(DLAF_AT_MPIRANKS 1) + set(DLAF_ATT_MPIRANKS 1) endif() endif() ### Test target set(DLAF_TEST_RUNALL_WITH_MPIEXEC OFF CACHE BOOL "Run all tests using the workload manager.") - set(_TEST_ARGUMENTS ${DLAF_AT_ARGUMENTS}) + set(_TEST_ARGUMENTS ${DLAF_ATT_ARGUMENTS}) if(DLAF_TEST_RUNALL_WITH_MPIEXEC OR IS_AN_MPI_TEST) if(MPIEXEC_NUMCORE_FLAG) @@ -132,7 +128,7 @@ function(DLAF_addTest test_target_name) set(_CORES_PER_RANK 1) endif() - math(EXPR DLAF_CORE_PER_RANK "${_CORES_PER_RANK}/${DLAF_AT_MPIRANKS}") + math(EXPR DLAF_CORE_PER_RANK "${_CORES_PER_RANK}/${DLAF_ATT_MPIRANKS}") if(NOT DLAF_CORE_PER_RANK) set(DLAF_CORE_PER_RANK 1) @@ -148,12 +144,12 @@ function(DLAF_addTest test_target_name) else() separate_arguments(MPIEXEC_PREFLAGS) set(_TEST_COMMAND - ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${DLAF_AT_MPIRANKS} ${_MPI_CORE_ARGS} + ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${DLAF_ATT_MPIRANKS} ${_MPI_CORE_ARGS} ${MPIEXEC_PREFLAGS} ${DLAF_TEST_PREFLAGS} $ ${DLAF_TEST_POSTFLAGS} ${MPIEXEC_POSTFLAGS} ) endif() - set(_TEST_LABEL "RANK_${DLAF_AT_MPIRANKS}") + set(_TEST_LABEL "RANK_${DLAF_ATT_MPIRANKS}") else() # ----- Classic test @@ -165,12 +161,12 @@ function(DLAF_addTest test_target_name) separate_arguments(_PIKA_EXTRA_ARGS_LIST UNIX_COMMAND ${DLAF_PIKATEST_EXTRA_ARGS}) # --pika:bind=none is useful just in case more ranks are going to be allocated on the same node. - if(IS_AN_MPI_TEST AND (DLAF_AT_MPIRANKS GREATER 1) AND (NOT DLAF_TEST_THREAD_BINDING_ENABLED)) + if(IS_AN_MPI_TEST AND (DLAF_ATT_MPIRANKS GREATER 1) AND (NOT DLAF_TEST_THREAD_BINDING_ENABLED)) _set_element_to_fallback_value(_PIKA_EXTRA_ARGS_LIST "--pika:bind" "--pika:bind=none") endif() if(IS_AN_MPI_TEST AND DLAF_MPI_PRESET STREQUAL "plain-mpi") - math(EXPR _DLAF_PIKA_THREADS "${MPIEXEC_MAX_NUMPROCS}/${DLAF_AT_MPIRANKS}") + math(EXPR _DLAF_PIKA_THREADS "${MPIEXEC_MAX_NUMPROCS}/${DLAF_ATT_MPIRANKS}") if(_DLAF_PIKA_THREADS LESS 2) set(_DLAF_PIKA_THREADS 2) @@ -186,16 +182,16 @@ function(DLAF_addTest test_target_name) # Special treatment for C API tests # C API tests require pika arguments to be hard-coded in the test file - if(DLAF_AT_USE_MAIN STREQUAL CAPI) + if(DLAF_ATT_USE_MAIN STREQUAL CAPI) separate_arguments(_PIKA_EXTRA_ARGS_LIST_CAPI UNIX_COMMAND ${DLAF_PIKATEST_EXTRA_ARGS}) # --pika:bind=none is useful just in case more ranks are going to be allocated on the same node. - if((DLAF_AT_MPIRANKS GREATER 1) AND (NOT DLAF_TEST_THREAD_BINDING_ENABLED)) + if((DLAF_ATT_MPIRANKS GREATER 1) AND (NOT DLAF_TEST_THREAD_BINDING_ENABLED)) _set_element_to_fallback_value(_PIKA_EXTRA_ARGS_LIST_CAPI "--pika:bind" "--pika:bind=none") endif() if(IS_AN_MPI_TEST AND DLAF_MPI_PRESET STREQUAL "plain-mpi") - math(EXPR _DLAF_PIKA_THREADS "${MPIEXEC_MAX_NUMPROCS}/${DLAF_AT_MPIRANKS}") + math(EXPR _DLAF_PIKA_THREADS "${MPIEXEC_MAX_NUMPROCS}/${DLAF_ATT_MPIRANKS}") if(_DLAF_PIKA_THREADS LESS 2) set(_DLAF_PIKA_THREADS 2) @@ -215,11 +211,34 @@ function(DLAF_addTest test_target_name) endif() + add_test(NAME ${test_target_name} COMMAND ${_TEST_COMMAND} ${_TEST_ARGUMENTS}) + set_tests_properties(${test_target_name} PROPERTIES LABELS "${_TEST_LABEL}") +endfunction() + +function(DLAF_addTest test_target_name) + set(options "") + set(oneValueArgs MPIRANKS USE_MAIN) + set(multiValueArgs SOURCES COMPILE_DEFINITIONS INCLUDE_DIRS LIBRARIES ARGUMENTS) + cmake_parse_arguments(DLAF_AT "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + ### Checks + if(DLAF_AT_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Unknown arguments ${DLAF_AT_UNPARSED_ARGUMENTS}") + endif() + + if(NOT DLAF_AT_SOURCES) + message(FATAL_ERROR "No sources specified for this test") + endif() + ### Test executable target add_executable(${test_target_name} ${DLAF_AT_SOURCES}) target_link_libraries( ${test_target_name} PRIVATE ${_gtest_tgt} DLAF_test ${DLAF_AT_LIBRARIES} dlaf.prop_private ) + set(IS_AN_MPI_TEST FALSE) + if(DLAF_AT_USE_MAIN MATCHES MPI OR DLAF_AT_USE_MAIN STREQUAL CAPI) + set(IS_AN_MPI_TEST TRUE) + endif() target_compile_definitions( ${test_target_name} PRIVATE ${DLAF_AT_COMPILE_DEFINITIONS} $<$: NUM_MPI_RANKS=${DLAF_AT_MPIRANKS}> @@ -229,8 +248,6 @@ function(DLAF_addTest test_target_name) ) target_add_warnings(${test_target_name}) DLAF_addPrecompiledHeaders(${test_target_name}) - add_test(NAME ${test_target_name} COMMAND ${_TEST_COMMAND} ${_TEST_ARGUMENTS}) - set_tests_properties(${test_target_name} PROPERTIES LABELS "${_TEST_LABEL}") ### DEPLOY include(GNUInstallDirs) @@ -242,4 +259,12 @@ function(DLAF_addTest test_target_name) RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) endif() + + ### Test + DLAF_addTargetTest( + ${test_target_name} + MPIRANKS ${DLAF_AT_MPIRANKS} + USE_MAIN ${DLAF_AT_USE_MAIN} + ARGUMENTS ${DLAF_AT_ARGUMENTS} + ) endfunction() diff --git a/miniapp/CMakeLists.txt b/miniapp/CMakeLists.txt index 9052ed64ec..c84af071cc 100644 --- a/miniapp/CMakeLists.txt +++ b/miniapp/CMakeLists.txt @@ -24,27 +24,41 @@ target_link_libraries(DLAF_miniapp INTERFACE DLAF::DLAF pika::pika) target_include_directories(DLAF_miniapp INTERFACE $) DLAF_addMiniapp(miniapp_cholesky SOURCES miniapp_cholesky.cpp) - DLAF_addMiniapp(miniapp_gen_to_std SOURCES miniapp_gen_to_std.cpp) - DLAF_addMiniapp(miniapp_reduction_to_band SOURCES miniapp_reduction_to_band.cpp) - DLAF_addMiniapp(miniapp_band_to_tridiag SOURCES miniapp_band_to_tridiag.cpp) - DLAF_addMiniapp(miniapp_tridiag_solver SOURCES miniapp_tridiag_solver.cpp) - DLAF_addMiniapp(miniapp_bt_band_to_tridiag SOURCES miniapp_bt_band_to_tridiag.cpp) - DLAF_addMiniapp(miniapp_bt_reduction_to_band SOURCES miniapp_bt_reduction_to_band.cpp) - DLAF_addMiniapp(miniapp_triangular_solver SOURCES miniapp_triangular_solver.cpp) - DLAF_addMiniapp(miniapp_triangular_multiplication SOURCES miniapp_triangular_multiplication.cpp) - DLAF_addMiniapp(miniapp_eigensolver SOURCES miniapp_eigensolver.cpp) - DLAF_addMiniapp(miniapp_gen_eigensolver SOURCES miniapp_gen_eigensolver.cpp) - DLAF_addMiniapp(miniapp_communication SOURCES miniapp_communication.cpp) +if(DLAF_BUILD_TESTING) + set(miniapp_test_args + USE_MAIN + MPIPIKA + MPIRANKS + 6 + ARGUMENTS + --grid-rows=3 + --grid-cols=2 + --check=all + ) + DLAF_addTargetTest(miniapp_cholesky ${miniapp_test_args}) + DLAF_addTargetTest(miniapp_gen_to_std ${miniapp_test_args}) + DLAF_addTargetTest(miniapp_reduction_to_band ${miniapp_test_args}) + DLAF_addTargetTest(miniapp_band_to_tridiag ${miniapp_test_args}) + DLAF_addTargetTest(miniapp_tridiag_solver ${miniapp_test_args}) + DLAF_addTargetTest(miniapp_bt_band_to_tridiag ${miniapp_test_args}) + DLAF_addTargetTest(miniapp_bt_reduction_to_band ${miniapp_test_args}) + DLAF_addTargetTest(miniapp_triangular_solver ${miniapp_test_args}) + DLAF_addTargetTest(miniapp_triangular_multiplication ${miniapp_test_args}) + DLAF_addTargetTest(miniapp_eigensolver ${miniapp_test_args}) + DLAF_addTargetTest(miniapp_gen_eigensolver ${miniapp_test_args}) + DLAF_addTargetTest(miniapp_communication ${miniapp_test_args}) +endif() + add_subdirectory(kernel) From d24895e5220e48e46082133685a9f56f99025b0d Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 21 Mar 2024 10:25:35 +0100 Subject: [PATCH 02/33] TEMP --- .cmake-format.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.cmake-format.py b/.cmake-format.py index 58bd9c8834..8665261603 100644 --- a/.cmake-format.py +++ b/.cmake-format.py @@ -19,6 +19,15 @@ "MPIRANKS": '1', } }, + "DLAF_addTargetTest": { + "pargs": 1, + "flags": [], + "kwargs": { + "ARGUMENTS": '*', + "USE_MAIN": '1', + "MPIRANKS": '1', + } + }, "DLAF_addMiniapp": { "pargs": 1, "flags": [], From 0f5a5526b4c9f69c40c560d2d569e92fb9ecaecd Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 20 Mar 2024 10:00:08 +0100 Subject: [PATCH 03/33] Update TEST_BINARIES filter to ignore everything but full paths --- ci/docker/deploy.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/deploy.Dockerfile b/ci/docker/deploy.Dockerfile index a500dd918e..d4340ea195 100644 --- a/ci/docker/deploy.Dockerfile +++ b/ci/docker/deploy.Dockerfile @@ -36,7 +36,7 @@ RUN pushd ${SOURCE}/miniapp && \ # Prune and bundle binaries RUN mkdir ${BUILD}-tmp && cd ${BUILD} && \ - export TEST_BINARIES=`PATH=${SOURCE}/ci:$PATH ctest --show-only=json-v1 | jq '.tests | map(.command | .[] | select(contains("check-threads") | not)) | .[]' | tr -d \"` && \ + export TEST_BINARIES=`PATH=${SOURCE}/ci:$PATH ctest --show-only=json-v1 | jq --raw-output '.tests | map(.command | .[] | select(startswith("/"))) | .[]' | tr -d \"` && \ LIBASAN=$(find /usr/lib -name libclang_rt.asan-x86_64.so) && \ if [[ -n "${LIBASAN}" ]]; then export LD_LIBRARY_PATH=$(dirname ${LIBASAN}):${LD_LIBRARY_PATH}; fi && \ echo "Binary sizes:" && \ From f9a95c7abfe9dbb18803a65cd1f1a07385ca52b7 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 20 Mar 2024 13:14:47 +0100 Subject: [PATCH 04/33] Move DLAF_PIKATEST_EXTRA_ARGS variable to DLAF_AddTest.cmake --- cmake/DLAF_AddTest.cmake | 2 ++ test/CMakeLists.txt | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/DLAF_AddTest.cmake b/cmake/DLAF_AddTest.cmake index 9a67ff0650..a2a4f0c5f2 100644 --- a/cmake/DLAF_AddTest.cmake +++ b/cmake/DLAF_AddTest.cmake @@ -45,6 +45,8 @@ # include/ # ) +set(DLAF_PIKATEST_EXTRA_ARGS "" CACHE STRING "Extra arguments for tests with pika") + # Check if LIST_NAME contains at least an element that matches ELEMENT_REGEX. If not, add FALLBACK # to the list. function(_set_element_to_fallback_value LIST_NAME ELEMENT_REGEX FALLBACK) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6923764872..3a18394716 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -21,8 +21,6 @@ option( # significantly faster when threads are not pinned. option(DLAF_TEST_THREAD_BINDING_ENABLED "If OFF disables pika thread binding." ON) -set(DLAF_PIKATEST_EXTRA_ARGS "" CACHE STRING "Extra arguments for tests with pika") - # If DLAF_CI_RUNNER_USES_MPIRUN=on we don't want to use any preset, so we just go for the custom one # without setting any variable. if(DLAF_CI_RUNNER_USES_MPIRUN) From 972aa378b935b4571ebd6147e8d2ddf9a5094b41 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 20 Mar 2024 13:17:23 +0100 Subject: [PATCH 05/33] Move CMake test options to main CMakeLists.txt to make available for miniapps as well --- CMakeLists.txt | 22 ++++++++++++++++++++++ test/CMakeLists.txt | 19 ------------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5788a1359c..a626a64676 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -185,6 +185,28 @@ add_subdirectory(external) # --------------------------------------------------------------------------- add_subdirectory(src) +# --------------------------------------------------------------------------- +# Test options (apply to unit tests and miniapps as tests) +# --------------------------------------------------------------------------- +set(DLAF_PRESET_OPTIONS "plain-mpi" "slurm" "custom") +set(DLAF_MPI_PRESET "plain-mpi" CACHE STRING "Select a preset to use") +set_property(CACHE DLAF_MPI_PRESET PROPERTY STRINGS ${DLAF_PRESET_OPTIONS}) +option( + DLAF_CI_RUNNER_USES_MPIRUN + "Remove mpiexec command for tests executed by ctest. This option is to be used if the CI runner executes the tests with ctest -L RANK_" + OFF +) + +# On some machines, tests using multiple ranks + oversubscribing run +# significantly faster when threads are not pinned. +option(DLAF_TEST_THREAD_BINDING_ENABLED "If OFF disables pika thread binding." ON) + +# If DLAF_CI_RUNNER_USES_MPIRUN=on we don't want to use any preset, so we just go for the custom one +# without setting any variable. +if(DLAF_CI_RUNNER_USES_MPIRUN) + set(DLAF_MPI_PRESET "custom" CACHE STRING "" FORCE) +endif() + # --------------------------------------------------------------------------- # mini Apps # --------------------------------------------------------------------------- diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3a18394716..389cb4b0e9 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -8,25 +8,6 @@ # SPDX-License-Identifier: BSD-3-Clause # -set(DLAF_PRESET_OPTIONS "plain-mpi" "slurm" "custom") -set(DLAF_MPI_PRESET "plain-mpi" CACHE STRING "Select a preset to use") -set_property(CACHE DLAF_MPI_PRESET PROPERTY STRINGS ${DLAF_PRESET_OPTIONS}) -option( - DLAF_CI_RUNNER_USES_MPIRUN - "Remove mpiexec command for tests executed by ctest. This option is to be used if the CI runner executes the tests with ctest -L RANK_" - OFF -) - -# On some machines, tests using multiple ranks + oversubscribing run -# significantly faster when threads are not pinned. -option(DLAF_TEST_THREAD_BINDING_ENABLED "If OFF disables pika thread binding." ON) - -# If DLAF_CI_RUNNER_USES_MPIRUN=on we don't want to use any preset, so we just go for the custom one -# without setting any variable. -if(DLAF_CI_RUNNER_USES_MPIRUN) - set(DLAF_MPI_PRESET "custom" CACHE STRING "" FORCE) -endif() - # if a preset has been selected and it has been changed from previous configurations if(NOT DLAF_MPI_PRESET STREQUAL _DLAF_MPI_PRESET) From 8b5067b8bb6a603a18ec74c763db0485b16edc8a Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 8 Apr 2024 17:32:16 +0200 Subject: [PATCH 06/33] Add category label to unit tests Use category label to split up CI jobs (e.g. by miniapp or unit test). --- .cmake-format.py | 2 ++ ci/ctest_to_gitlab.sh | 30 +++++++++++++++++++----------- cmake/DLAF_AddTest.cmake | 17 +++++++++++++---- miniapp/CMakeLists.txt | 2 ++ 4 files changed, 36 insertions(+), 15 deletions(-) diff --git a/.cmake-format.py b/.cmake-format.py index 8665261603..9654f5ced1 100644 --- a/.cmake-format.py +++ b/.cmake-format.py @@ -17,6 +17,7 @@ "ARGUMENTS": '*', "USE_MAIN": '1', "MPIRANKS": '1', + "CATEGORY": '1', } }, "DLAF_addTargetTest": { @@ -26,6 +27,7 @@ "ARGUMENTS": '*', "USE_MAIN": '1', "MPIRANKS": '1', + "CATEGORY": '1', } }, "DLAF_addMiniapp": { diff --git a/ci/ctest_to_gitlab.sh b/ci/ctest_to_gitlab.sh index fb5c6cf423..7cb0492612 100755 --- a/ci/ctest_to_gitlab.sh +++ b/ci/ctest_to_gitlab.sh @@ -47,7 +47,7 @@ upload_reports: " JOB_TEMPLATE=" -{{LABEL}}: +{{CATEGORY_LABEL}}_{{RANK_LABEL}}: stage: test extends: .daint variables: @@ -60,7 +60,7 @@ JOB_TEMPLATE=" USE_MPI: 'YES' DISABLE_AFTER_SCRIPT: 'YES' DLAF_HDF5_TEST_OUTPUT_PATH: \$CI_PROJECT_DIR - script: mpi-ctest -L {{LABEL}} + script: mpi-ctest -L {{CATEGORY_LABEL}} -L {{RANK_LABEL}} artifacts: paths: - codecov-reports/" @@ -85,7 +85,7 @@ variables: " JOB_TEMPLATE=" -{{LABEL}}: +{{CATEGORY_LABEL}}_{{RANK_LABEL}}: stage: test extends: .daint variables: @@ -98,20 +98,28 @@ JOB_TEMPLATE=" USE_MPI: 'YES' DISABLE_AFTER_SCRIPT: 'YES' DLAF_HDF5_TEST_OUTPUT_PATH: \$CI_PROJECT_DIR - script: mpi-ctest -L {{LABEL}}" + script: mpi-ctest -L {{CATEGORY_LABEL}} -L {{RANK_LABEL}}" fi JOBS="" -for label in `ctest --print-labels | egrep -o "RANK_[1-9][0-9]?"`; do - N=`echo "$label" | sed "s/RANK_//"` - C=$(( THREADS_PER_NODE / N )) +for rank_label in `ctest --print-labels | egrep -o "RANK_[1-9][0-9]?"`; do + for category_label in `ctest --print-labels | egrep -o "CATEGORY_[A-Z]+"`; do + N=`echo "$rank_label" | sed "s/RANK_//"` + C=$(( THREADS_PER_NODE / N )) - JOB=`echo "$JOB_TEMPLATE" | sed "s|{{LABEL}}|$label|g" \ - | sed "s|{{NTASKS}}|$N|g" \ - | sed "s|{{CPUS_PER_TASK}}|$C|g"` + # Skip label combinations that match no tests + if ! ctest -N -L $category_label -L $rank_label | egrep --quiet "^Total Tests: [1-9][0-9]?$"; then + continue + fi - JOBS="$JOBS$JOB" + JOB=`echo "$JOB_TEMPLATE" | sed "s|{{CATEGORY_LABEL}}|$category_label|g" \ + | sed "s|{{RANK_LABEL}}|$rank_label|g" \ + | sed "s|{{NTASKS}}|$N|g" \ + | sed "s|{{CPUS_PER_TASK}}|$C|g"` + + JOBS="$JOBS$JOB" + done done echo "${BASE_TEMPLATE/'{{JOBS}}'/$JOBS}" diff --git a/cmake/DLAF_AddTest.cmake b/cmake/DLAF_AddTest.cmake index a2a4f0c5f2..089cf60fb8 100644 --- a/cmake/DLAF_AddTest.cmake +++ b/cmake/DLAF_AddTest.cmake @@ -15,6 +15,7 @@ # [LIBRARIES ] # [MPIRANKS ] # [USE_MAIN {PLAIN | PIKA | MPI | MPIPIKA}] +# [CATEGORY ] # ) # # At least one source file has to be specified, while other parameters are optional. @@ -61,7 +62,7 @@ endfunction() function(DLAF_addTargetTest test_target_name) set(options "") - set(oneValueArgs MPIRANKS USE_MAIN) + set(oneValueArgs CATEGORY MPIRANKS USE_MAIN) set(multiValueArgs ARGUMENTS) cmake_parse_arguments(DLAF_ATT "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -93,6 +94,14 @@ function(DLAF_addTargetTest test_target_name) message(FATAL_ERROR "USE_MAIN=${DLAF_ATT_USE_MAIN} is not a supported option") endif() + set(_TEST_LABELS) + + if(NOT DLAF_ATT_CATEGORY) + set(DLAF_ATT_CATEGORY "UNIT") + endif() + + list(APPEND _TEST_LABELS "CATEGORY_${DLAF_ATT_CATEGORY}") + if(IS_AN_MPI_TEST) if(NOT DLAF_ATT_MPIRANKS) message(FATAL_ERROR "You are asking for an MPI external main without specifying MPIRANKS") @@ -151,12 +160,12 @@ function(DLAF_addTargetTest test_target_name) ${DLAF_TEST_POSTFLAGS} ${MPIEXEC_POSTFLAGS} ) endif() - set(_TEST_LABEL "RANK_${DLAF_ATT_MPIRANKS}") + list(APPEND _TEST_LABELS "RANK_${DLAF_ATT_MPIRANKS}") else() # ----- Classic test set(_TEST_COMMAND ${DLAF_TEST_PREFLAGS} $ ${DLAF_TEST_POSTFLAGS}) - set(_TEST_LABEL "RANK_1") + list(APPEND _TEST_LABELS "RANK_1") endif() if(IS_AN_PIKA_TEST) @@ -214,7 +223,7 @@ function(DLAF_addTargetTest test_target_name) endif() add_test(NAME ${test_target_name} COMMAND ${_TEST_COMMAND} ${_TEST_ARGUMENTS}) - set_tests_properties(${test_target_name} PROPERTIES LABELS "${_TEST_LABEL}") + set_tests_properties(${test_target_name} PROPERTIES LABELS "${_TEST_LABELS}") endfunction() function(DLAF_addTest test_target_name) diff --git a/miniapp/CMakeLists.txt b/miniapp/CMakeLists.txt index c84af071cc..c1aabde73f 100644 --- a/miniapp/CMakeLists.txt +++ b/miniapp/CMakeLists.txt @@ -46,6 +46,8 @@ if(DLAF_BUILD_TESTING) --grid-rows=3 --grid-cols=2 --check=all + CATEGORY + MINIAPP ) DLAF_addTargetTest(miniapp_cholesky ${miniapp_test_args}) DLAF_addTargetTest(miniapp_gen_to_std ${miniapp_test_args}) From 01218fbc0e589e5e46c4c4e5888b1bb4c5cf2fe5 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 9 Apr 2024 09:55:36 +0200 Subject: [PATCH 07/33] Fix TEST_BINARIES variable in codecov CI configuration --- ci/docker/codecov.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/codecov.Dockerfile b/ci/docker/codecov.Dockerfile index 3673b680ac..16d1c4c6a8 100644 --- a/ci/docker/codecov.Dockerfile +++ b/ci/docker/codecov.Dockerfile @@ -32,7 +32,7 @@ RUN spack repo rm --scope site dlaf && \ # Prune and bundle binaries RUN mkdir ${BUILD}-tmp && cd ${BUILD} && \ - export TEST_BINARIES=`PATH=${SOURCE}/ci:$PATH ctest --show-only=json-v1 | jq '.tests | map(.command | .[] | select(contains("check-threads") | not)) | .[]' | tr -d \"` && \ + export TEST_BINARIES=`PATH=${SOURCE}/ci:$PATH ctest --show-only=json-v1 | jq --raw-output '.tests | map(.command | .[] | select(startswith("/"))) | .[]' | tr -d \"` && \ echo "Binary sizes:" && \ ls -lh ${TEST_BINARIES} && \ ls -lh src/lib* && \ From ff5bc6122c7d51eb24f57ae0facf5956108fe791 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 9 Apr 2024 14:38:48 +0200 Subject: [PATCH 08/33] Fix gtest linking for tests --- cmake/DLAF_AddTest.cmake | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/cmake/DLAF_AddTest.cmake b/cmake/DLAF_AddTest.cmake index 089cf60fb8..c737c01c55 100644 --- a/cmake/DLAF_AddTest.cmake +++ b/cmake/DLAF_AddTest.cmake @@ -73,22 +73,16 @@ function(DLAF_addTargetTest test_target_name) set(IS_AN_MPI_TEST FALSE) set(IS_AN_PIKA_TEST FALSE) - if(NOT DLAF_ATT_USE_MAIN) - set(_gtest_tgt gtest) - elseif(DLAF_ATT_USE_MAIN STREQUAL PLAIN) - set(_gtest_tgt gtest_main) + if(NOT DLAF_ATT_USE_MAIN OR DLAF_ATT_USE_MAIN STREQUAL PLAIN) + elseif(DLAF_ATT_USE_MAIN STREQUAL PIKA) - set(_gtest_tgt DLAF_gtest_pika_main) set(IS_AN_PIKA_TEST TRUE) elseif(DLAF_ATT_USE_MAIN STREQUAL MPI) - set(_gtest_tgt DLAF_gtest_mpi_main) set(IS_AN_MPI_TEST TRUE) elseif(DLAF_ATT_USE_MAIN STREQUAL MPIPIKA) - set(_gtest_tgt DLAF_gtest_mpipika_main) set(IS_AN_MPI_TEST TRUE) set(IS_AN_PIKA_TEST TRUE) elseif(DLAF_ATT_USE_MAIN STREQUAL CAPI) - set(_gtest_tgt DLAF_gtest_mpi_main) set(IS_AN_MPI_TEST TRUE) else() message(FATAL_ERROR "USE_MAIN=${DLAF_ATT_USE_MAIN} is not a supported option") @@ -241,6 +235,22 @@ function(DLAF_addTest test_target_name) message(FATAL_ERROR "No sources specified for this test") endif() + if(NOT DLAF_AT_USE_MAIN) + set(_gtest_tgt gtest) + elseif(DLAF_AT_USE_MAIN STREQUAL PLAIN) + set(_gtest_tgt gtest_main) + elseif(DLAF_AT_USE_MAIN STREQUAL PIKA) + set(_gtest_tgt DLAF_gtest_pika_main) + elseif(DLAF_AT_USE_MAIN STREQUAL MPI) + set(_gtest_tgt DLAF_gtest_mpi_main) + elseif(DLAF_AT_USE_MAIN STREQUAL MPIPIKA) + set(_gtest_tgt DLAF_gtest_mpipika_main) + elseif(DLAF_AT_USE_MAIN STREQUAL CAPI) + set(_gtest_tgt DLAF_gtest_mpi_main) + else() + message(FATAL_ERROR "USE_MAIN=${DLAF_AT_USE_MAIN} is not a supported option") + endif() + ### Test executable target add_executable(${test_target_name} ${DLAF_AT_SOURCES}) target_link_libraries( From e754585a7d8d543ef79bcf6f3be07edefe4e516d Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 10 Apr 2024 16:26:08 +0200 Subject: [PATCH 09/33] Use pika::wait instead of waitLocalTiles in miniapps --- miniapp/miniapp_band_to_tridiag.cpp | 3 +-- miniapp/miniapp_bt_band_to_tridiag.cpp | 2 +- miniapp/miniapp_bt_reduction_to_band.cpp | 2 +- miniapp/miniapp_cholesky.cpp | 2 +- miniapp/miniapp_communication.cpp | 3 +-- miniapp/miniapp_eigensolver.cpp | 2 +- miniapp/miniapp_gen_eigensolver.cpp | 2 +- miniapp/miniapp_gen_to_std.cpp | 2 +- miniapp/miniapp_reduction_to_band.cpp | 3 +-- miniapp/miniapp_triangular_multiplication.cpp | 3 +-- miniapp/miniapp_triangular_solver.cpp | 3 +-- miniapp/miniapp_tridiag_solver.cpp | 4 +--- 12 files changed, 12 insertions(+), 19 deletions(-) diff --git a/miniapp/miniapp_band_to_tridiag.cpp b/miniapp/miniapp_band_to_tridiag.cpp index bace372c48..ab06b7cf8b 100644 --- a/miniapp/miniapp_band_to_tridiag.cpp +++ b/miniapp/miniapp_band_to_tridiag.cpp @@ -127,8 +127,7 @@ struct BandToTridiagMiniapp { auto [trid, hhr] = bench(); // wait and barrier for all ranks - trid.waitLocalTiles(); - hhr.waitLocalTiles(); + pika::wait(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); } diff --git a/miniapp/miniapp_bt_band_to_tridiag.cpp b/miniapp/miniapp_bt_band_to_tridiag.cpp index 0b89769198..47dd8fbe9f 100644 --- a/miniapp/miniapp_bt_band_to_tridiag.cpp +++ b/miniapp/miniapp_bt_band_to_tridiag.cpp @@ -137,7 +137,7 @@ struct BacktransformBandToTridiagMiniapp { comm_grid, opts.b, mat_e.get(), mat_hh); // wait and barrier for all ranks - mat_e.get().waitLocalTiles(); + pika::wait(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); diff --git a/miniapp/miniapp_bt_reduction_to_band.cpp b/miniapp/miniapp_bt_reduction_to_band.cpp index 8761bcd91c..35c1c51f02 100644 --- a/miniapp/miniapp_bt_reduction_to_band.cpp +++ b/miniapp/miniapp_bt_reduction_to_band.cpp @@ -157,7 +157,7 @@ struct BacktransformBandToTridiagMiniapp { comm_grid, opts.b, mat_e.get(), mat_hh.get(), mat_taus); // wait and barrier for all ranks - mat_e.get().waitLocalTiles(); + pika::wait(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); diff --git a/miniapp/miniapp_cholesky.cpp b/miniapp/miniapp_cholesky.cpp index 947835f955..3ad6f8b926 100644 --- a/miniapp/miniapp_cholesky.cpp +++ b/miniapp/miniapp_cholesky.cpp @@ -148,7 +148,7 @@ struct choleskyMiniapp { matrix.get()); // wait and barrier for all ranks - matrix.get().waitLocalTiles(); + pika::wait(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); diff --git a/miniapp/miniapp_communication.cpp b/miniapp/miniapp_communication.cpp index dd147d6d26..aaa09d86f0 100644 --- a/miniapp/miniapp_communication.cpp +++ b/miniapp/miniapp_communication.cpp @@ -172,8 +172,7 @@ void benchmark_all_reduce(int64_t run_index, const Options& opts, Communicator& return scheduleAllReduce(std::move(comm), MPI_SUM, std::move(ro_tile), std::move(rw_tile)); }; benchmark_ro_rw(pcomm, matrix, matrix_out, allred); - matrix.waitLocalTiles(); - matrix_out.waitLocalTiles(); + pika::wait(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); auto t = timeit.elapsed(); diff --git a/miniapp/miniapp_eigensolver.cpp b/miniapp/miniapp_eigensolver.cpp index 11abddf5d5..06457d973f 100644 --- a/miniapp/miniapp_eigensolver.cpp +++ b/miniapp/miniapp_eigensolver.cpp @@ -160,7 +160,7 @@ struct EigensolverMiniapp { auto [eigenvalues, eigenvectors] = bench(); // wait and barrier for all ranks - eigenvectors.waitLocalTiles(); + pika::wait(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); double elapsed_time = timeit.elapsed(); diff --git a/miniapp/miniapp_gen_eigensolver.cpp b/miniapp/miniapp_gen_eigensolver.cpp index fe27104fbf..2b7c1e6a53 100644 --- a/miniapp/miniapp_gen_eigensolver.cpp +++ b/miniapp/miniapp_gen_eigensolver.cpp @@ -192,7 +192,7 @@ struct GenEigensolverMiniapp { auto [eigenvalues, eigenvectors] = bench(); // wait and barrier for all ranks - eigenvectors.waitLocalTiles(); + pika::wait(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); double elapsed_time = timeit.elapsed(); diff --git a/miniapp/miniapp_gen_to_std.cpp b/miniapp/miniapp_gen_to_std.cpp index 8fa3a8c70d..587e0689d2 100644 --- a/miniapp/miniapp_gen_to_std.cpp +++ b/miniapp/miniapp_gen_to_std.cpp @@ -139,7 +139,7 @@ struct GenToStdMiniapp { comm_grid, opts.uplo, matrix_a.get(), matrix_b.get()); // wait and barrier for all ranks - matrix_a.get().waitLocalTiles(); + pika::wait(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); } diff --git a/miniapp/miniapp_reduction_to_band.cpp b/miniapp/miniapp_reduction_to_band.cpp index 299af230b0..aeca036ec9 100644 --- a/miniapp/miniapp_reduction_to_band.cpp +++ b/miniapp/miniapp_reduction_to_band.cpp @@ -157,8 +157,7 @@ struct reductionToBandMiniapp { auto taus = bench(); // wait and barrier for all ranks - matrix.waitLocalTiles(); - taus.waitLocalTiles(); + pika::wait(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); diff --git a/miniapp/miniapp_triangular_multiplication.cpp b/miniapp/miniapp_triangular_multiplication.cpp index 93e07fc7b4..fcfe9f38e7 100644 --- a/miniapp/miniapp_triangular_multiplication.cpp +++ b/miniapp/miniapp_triangular_multiplication.cpp @@ -131,8 +131,7 @@ struct triangularMultiplicationMiniapp { MatrixMirrorType b(bh); auto sync_barrier = [&]() { - a.get().waitLocalTiles(); - b.get().waitLocalTiles(); + pika::wait(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); }; diff --git a/miniapp/miniapp_triangular_solver.cpp b/miniapp/miniapp_triangular_solver.cpp index 3ebdb9440e..42968f56d8 100644 --- a/miniapp/miniapp_triangular_solver.cpp +++ b/miniapp/miniapp_triangular_solver.cpp @@ -131,8 +131,7 @@ struct triangularSolverMiniapp { MatrixMirrorType b(bh); auto sync_barrier = [&]() { - a.get().waitLocalTiles(); - b.get().waitLocalTiles(); + pika::wait(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); }; diff --git a/miniapp/miniapp_tridiag_solver.cpp b/miniapp/miniapp_tridiag_solver.cpp index 3457748c62..6dd5845b97 100644 --- a/miniapp/miniapp_tridiag_solver.cpp +++ b/miniapp/miniapp_tridiag_solver.cpp @@ -151,9 +151,7 @@ struct TridiagSolverMiniapp { tridiagonal_eigensolver(comm_grid, tridiag, evals_mirror.get(), evecs_mirror.get()); // wait and barrier for all ranks - tridiag.waitLocalTiles(); - evals_mirror.get().waitLocalTiles(); - evecs_mirror.get().waitLocalTiles(); + pika::wait(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); } From 944e6bdd4b3e90e1fedb99709940bb833e939006 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 23 Apr 2024 09:56:38 +0200 Subject: [PATCH 10/33] Update GitLab CI test job names to exclude CATEGORY_ prefix --- ci/ctest_to_gitlab.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ci/ctest_to_gitlab.sh b/ci/ctest_to_gitlab.sh index 7cb0492612..52e6716e87 100755 --- a/ci/ctest_to_gitlab.sh +++ b/ci/ctest_to_gitlab.sh @@ -47,7 +47,7 @@ upload_reports: " JOB_TEMPLATE=" -{{CATEGORY_LABEL}}_{{RANK_LABEL}}: +{{CATEGORY_LABEL_NOPREFIX}}_{{RANK_LABEL}}: stage: test extends: .daint variables: @@ -85,7 +85,7 @@ variables: " JOB_TEMPLATE=" -{{CATEGORY_LABEL}}_{{RANK_LABEL}}: +{{CATEGORY_LABEL_NOPREFIX}}_{{RANK_LABEL}}: stage: test extends: .daint variables: @@ -113,7 +113,9 @@ for rank_label in `ctest --print-labels | egrep -o "RANK_[1-9][0-9]?"`; do continue fi - JOB=`echo "$JOB_TEMPLATE" | sed "s|{{CATEGORY_LABEL}}|$category_label|g" \ + category_label_noprefix=`echo "$category_label" | sed "s/CATEGORY_//"` + JOB=`echo "$JOB_TEMPLATE" | sed "s|{{CATEGORY_LABEL_NOPREFIX}}|$category_label_noprefix|g" \ + | sed "s|{{CATEGORY_LABEL}}|$category_label|g" \ | sed "s|{{RANK_LABEL}}|$rank_label|g" \ | sed "s|{{NTASKS}}|$N|g" \ | sed "s|{{CPUS_PER_TASK}}|$C|g"` From f081e0ef90c18270cb8d22158b5786e931925ba3 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 27 May 2024 10:56:47 +0200 Subject: [PATCH 11/33] Revert "Use pika::wait instead of waitLocalTiles in miniapps" This reverts commit 0755556c53fdcc2218e5c34d91edd0299d845e73. --- miniapp/miniapp_band_to_tridiag.cpp | 3 ++- miniapp/miniapp_bt_band_to_tridiag.cpp | 2 +- miniapp/miniapp_bt_reduction_to_band.cpp | 2 +- miniapp/miniapp_cholesky.cpp | 2 +- miniapp/miniapp_communication.cpp | 3 ++- miniapp/miniapp_eigensolver.cpp | 2 +- miniapp/miniapp_gen_eigensolver.cpp | 2 +- miniapp/miniapp_gen_to_std.cpp | 2 +- miniapp/miniapp_reduction_to_band.cpp | 3 ++- miniapp/miniapp_triangular_multiplication.cpp | 3 ++- miniapp/miniapp_triangular_solver.cpp | 3 ++- miniapp/miniapp_tridiag_solver.cpp | 4 +++- 12 files changed, 19 insertions(+), 12 deletions(-) diff --git a/miniapp/miniapp_band_to_tridiag.cpp b/miniapp/miniapp_band_to_tridiag.cpp index ab06b7cf8b..bace372c48 100644 --- a/miniapp/miniapp_band_to_tridiag.cpp +++ b/miniapp/miniapp_band_to_tridiag.cpp @@ -127,7 +127,8 @@ struct BandToTridiagMiniapp { auto [trid, hhr] = bench(); // wait and barrier for all ranks - pika::wait(); + trid.waitLocalTiles(); + hhr.waitLocalTiles(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); } diff --git a/miniapp/miniapp_bt_band_to_tridiag.cpp b/miniapp/miniapp_bt_band_to_tridiag.cpp index 47dd8fbe9f..0b89769198 100644 --- a/miniapp/miniapp_bt_band_to_tridiag.cpp +++ b/miniapp/miniapp_bt_band_to_tridiag.cpp @@ -137,7 +137,7 @@ struct BacktransformBandToTridiagMiniapp { comm_grid, opts.b, mat_e.get(), mat_hh); // wait and barrier for all ranks - pika::wait(); + mat_e.get().waitLocalTiles(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); diff --git a/miniapp/miniapp_bt_reduction_to_band.cpp b/miniapp/miniapp_bt_reduction_to_band.cpp index 35c1c51f02..8761bcd91c 100644 --- a/miniapp/miniapp_bt_reduction_to_band.cpp +++ b/miniapp/miniapp_bt_reduction_to_band.cpp @@ -157,7 +157,7 @@ struct BacktransformBandToTridiagMiniapp { comm_grid, opts.b, mat_e.get(), mat_hh.get(), mat_taus); // wait and barrier for all ranks - pika::wait(); + mat_e.get().waitLocalTiles(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); diff --git a/miniapp/miniapp_cholesky.cpp b/miniapp/miniapp_cholesky.cpp index 3ad6f8b926..947835f955 100644 --- a/miniapp/miniapp_cholesky.cpp +++ b/miniapp/miniapp_cholesky.cpp @@ -148,7 +148,7 @@ struct choleskyMiniapp { matrix.get()); // wait and barrier for all ranks - pika::wait(); + matrix.get().waitLocalTiles(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); diff --git a/miniapp/miniapp_communication.cpp b/miniapp/miniapp_communication.cpp index aaa09d86f0..dd147d6d26 100644 --- a/miniapp/miniapp_communication.cpp +++ b/miniapp/miniapp_communication.cpp @@ -172,7 +172,8 @@ void benchmark_all_reduce(int64_t run_index, const Options& opts, Communicator& return scheduleAllReduce(std::move(comm), MPI_SUM, std::move(ro_tile), std::move(rw_tile)); }; benchmark_ro_rw(pcomm, matrix, matrix_out, allred); - pika::wait(); + matrix.waitLocalTiles(); + matrix_out.waitLocalTiles(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); auto t = timeit.elapsed(); diff --git a/miniapp/miniapp_eigensolver.cpp b/miniapp/miniapp_eigensolver.cpp index 06457d973f..11abddf5d5 100644 --- a/miniapp/miniapp_eigensolver.cpp +++ b/miniapp/miniapp_eigensolver.cpp @@ -160,7 +160,7 @@ struct EigensolverMiniapp { auto [eigenvalues, eigenvectors] = bench(); // wait and barrier for all ranks - pika::wait(); + eigenvectors.waitLocalTiles(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); double elapsed_time = timeit.elapsed(); diff --git a/miniapp/miniapp_gen_eigensolver.cpp b/miniapp/miniapp_gen_eigensolver.cpp index 2b7c1e6a53..fe27104fbf 100644 --- a/miniapp/miniapp_gen_eigensolver.cpp +++ b/miniapp/miniapp_gen_eigensolver.cpp @@ -192,7 +192,7 @@ struct GenEigensolverMiniapp { auto [eigenvalues, eigenvectors] = bench(); // wait and barrier for all ranks - pika::wait(); + eigenvectors.waitLocalTiles(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); double elapsed_time = timeit.elapsed(); diff --git a/miniapp/miniapp_gen_to_std.cpp b/miniapp/miniapp_gen_to_std.cpp index 587e0689d2..8fa3a8c70d 100644 --- a/miniapp/miniapp_gen_to_std.cpp +++ b/miniapp/miniapp_gen_to_std.cpp @@ -139,7 +139,7 @@ struct GenToStdMiniapp { comm_grid, opts.uplo, matrix_a.get(), matrix_b.get()); // wait and barrier for all ranks - pika::wait(); + matrix_a.get().waitLocalTiles(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); } diff --git a/miniapp/miniapp_reduction_to_band.cpp b/miniapp/miniapp_reduction_to_band.cpp index aeca036ec9..299af230b0 100644 --- a/miniapp/miniapp_reduction_to_band.cpp +++ b/miniapp/miniapp_reduction_to_band.cpp @@ -157,7 +157,8 @@ struct reductionToBandMiniapp { auto taus = bench(); // wait and barrier for all ranks - pika::wait(); + matrix.waitLocalTiles(); + taus.waitLocalTiles(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); diff --git a/miniapp/miniapp_triangular_multiplication.cpp b/miniapp/miniapp_triangular_multiplication.cpp index fcfe9f38e7..93e07fc7b4 100644 --- a/miniapp/miniapp_triangular_multiplication.cpp +++ b/miniapp/miniapp_triangular_multiplication.cpp @@ -131,7 +131,8 @@ struct triangularMultiplicationMiniapp { MatrixMirrorType b(bh); auto sync_barrier = [&]() { - pika::wait(); + a.get().waitLocalTiles(); + b.get().waitLocalTiles(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); }; diff --git a/miniapp/miniapp_triangular_solver.cpp b/miniapp/miniapp_triangular_solver.cpp index 42968f56d8..3ebdb9440e 100644 --- a/miniapp/miniapp_triangular_solver.cpp +++ b/miniapp/miniapp_triangular_solver.cpp @@ -131,7 +131,8 @@ struct triangularSolverMiniapp { MatrixMirrorType b(bh); auto sync_barrier = [&]() { - pika::wait(); + a.get().waitLocalTiles(); + b.get().waitLocalTiles(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); }; diff --git a/miniapp/miniapp_tridiag_solver.cpp b/miniapp/miniapp_tridiag_solver.cpp index 6dd5845b97..3457748c62 100644 --- a/miniapp/miniapp_tridiag_solver.cpp +++ b/miniapp/miniapp_tridiag_solver.cpp @@ -151,7 +151,9 @@ struct TridiagSolverMiniapp { tridiagonal_eigensolver(comm_grid, tridiag, evals_mirror.get(), evecs_mirror.get()); // wait and barrier for all ranks - pika::wait(); + tridiag.waitLocalTiles(); + evals_mirror.get().waitLocalTiles(); + evecs_mirror.get().waitLocalTiles(); DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); } From c5daa65c57c554a58f6820f70e5f465486c1c9a1 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 27 May 2024 11:20:32 +0200 Subject: [PATCH 12/33] Wait for all communicators in miniapp_cholesky --- miniapp/miniapp_cholesky.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/miniapp/miniapp_cholesky.cpp b/miniapp/miniapp_cholesky.cpp index 947835f955..8927e7a8a8 100644 --- a/miniapp/miniapp_cholesky.cpp +++ b/miniapp/miniapp_cholesky.cpp @@ -149,6 +149,11 @@ struct choleskyMiniapp { // wait and barrier for all ranks matrix.get().waitLocalTiles(); + for (std::size_t i = 0; i < comm_grid.num_pipelines(); ++i) { + sync_wait(comm_grid.full_communicator_pipeline().exclusive()); + sync_wait(comm_grid.row_communicator_pipeline().exclusive()); + sync_wait(comm_grid.col_communicator_pipeline().exclusive()); + } DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); From 63eb15052b7874004b87bb26b53fbb36d6bf1615 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 27 May 2024 11:41:17 +0200 Subject: [PATCH 13/33] Wait for all communicators in miniapp_reduction_to_band --- miniapp/miniapp_reduction_to_band.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/miniapp/miniapp_reduction_to_band.cpp b/miniapp/miniapp_reduction_to_band.cpp index 299af230b0..67cb16a678 100644 --- a/miniapp/miniapp_reduction_to_band.cpp +++ b/miniapp/miniapp_reduction_to_band.cpp @@ -97,6 +97,8 @@ struct Options struct reductionToBandMiniapp { template static void run(const Options& opts) { + using pika::this_thread::experimental::sync_wait; + using namespace dlaf; using dlaf::SizeType; using dlaf::comm::Communicator; @@ -159,6 +161,11 @@ struct reductionToBandMiniapp { // wait and barrier for all ranks matrix.waitLocalTiles(); taus.waitLocalTiles(); + for (std::size_t i = 0; i < comm_grid.num_pipelines(); ++i) { + sync_wait(comm_grid.full_communicator_pipeline().exclusive()); + sync_wait(comm_grid.row_communicator_pipeline().exclusive()); + sync_wait(comm_grid.col_communicator_pipeline().exclusive()); + } DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); elapsed_time = timeit.elapsed(); From bf6ee7b685e1e1f91c0c5b1bedd128c2ded57691 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 27 May 2024 12:37:15 +0200 Subject: [PATCH 14/33] Wait for all communicators in miniapp_eigensolver --- miniapp/miniapp_eigensolver.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/miniapp/miniapp_eigensolver.cpp b/miniapp/miniapp_eigensolver.cpp index 11abddf5d5..4e9b346902 100644 --- a/miniapp/miniapp_eigensolver.cpp +++ b/miniapp/miniapp_eigensolver.cpp @@ -161,6 +161,11 @@ struct EigensolverMiniapp { // wait and barrier for all ranks eigenvectors.waitLocalTiles(); + for (std::size_t i = 0; i < comm_grid.num_pipelines(); ++i) { + sync_wait(comm_grid.full_communicator_pipeline().exclusive()); + sync_wait(comm_grid.row_communicator_pipeline().exclusive()); + sync_wait(comm_grid.col_communicator_pipeline().exclusive()); + } DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); double elapsed_time = timeit.elapsed(); From fb29737c6f951835c1f4502935cf21578a6dbf0a Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 27 May 2024 13:35:55 +0200 Subject: [PATCH 15/33] Wait for all communicators in miniapp_gen_eigensolver --- miniapp/miniapp_gen_eigensolver.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/miniapp/miniapp_gen_eigensolver.cpp b/miniapp/miniapp_gen_eigensolver.cpp index fe27104fbf..8f416f903c 100644 --- a/miniapp/miniapp_gen_eigensolver.cpp +++ b/miniapp/miniapp_gen_eigensolver.cpp @@ -193,6 +193,11 @@ struct GenEigensolverMiniapp { // wait and barrier for all ranks eigenvectors.waitLocalTiles(); + for (std::size_t i = 0; i < comm_grid.num_pipelines(); ++i) { + sync_wait(comm_grid.full_communicator_pipeline().exclusive()); + sync_wait(comm_grid.row_communicator_pipeline().exclusive()); + sync_wait(comm_grid.col_communicator_pipeline().exclusive()); + } DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); double elapsed_time = timeit.elapsed(); From 0abe465bb8c514a85b8dfb4f6e6a0c7bff067ecc Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 28 May 2024 09:49:18 +0200 Subject: [PATCH 16/33] Wait for all communicators in miniapp_triangular_multiplication --- miniapp/miniapp_triangular_multiplication.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/miniapp/miniapp_triangular_multiplication.cpp b/miniapp/miniapp_triangular_multiplication.cpp index 93e07fc7b4..30b8704374 100644 --- a/miniapp/miniapp_triangular_multiplication.cpp +++ b/miniapp/miniapp_triangular_multiplication.cpp @@ -51,6 +51,7 @@ using dlaf::comm::CommunicatorGrid; using dlaf::common::Ordering; using dlaf::matrix::Matrix; using dlaf::matrix::MatrixMirror; +using pika::this_thread::experimental::sync_wait; struct Options : dlaf::miniapp::MiniappOptions { @@ -133,6 +134,11 @@ struct triangularMultiplicationMiniapp { auto sync_barrier = [&]() { a.get().waitLocalTiles(); b.get().waitLocalTiles(); + for (std::size_t i = 0; i < comm_grid.num_pipelines(); ++i) { + sync_wait(comm_grid.full_communicator_pipeline().exclusive()); + sync_wait(comm_grid.row_communicator_pipeline().exclusive()); + sync_wait(comm_grid.col_communicator_pipeline().exclusive()); + } DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); }; From eee19fd001c529d94158b16f7ddda1bd18e24c09 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Sep 2024 11:56:44 +0200 Subject: [PATCH 17/33] Add helper member function for waiting for all communication to complete on CommunicatorGrid --- .../dlaf/communication/communicator_grid.h | 5 ++++ miniapp/miniapp_band_to_tridiag.cpp | 2 +- miniapp/miniapp_bt_band_to_tridiag.cpp | 2 +- miniapp/miniapp_bt_reduction_to_band.cpp | 2 +- miniapp/miniapp_cholesky.cpp | 7 +---- miniapp/miniapp_eigensolver.cpp | 7 +---- miniapp/miniapp_gen_eigensolver.cpp | 7 +---- miniapp/miniapp_gen_to_std.cpp | 2 +- miniapp/miniapp_reduction_to_band.cpp | 7 +---- miniapp/miniapp_triangular_multiplication.cpp | 7 +---- miniapp/miniapp_triangular_solver.cpp | 2 +- miniapp/miniapp_tridiag_solver.cpp | 2 +- src/communication/communicator_grid.cpp | 28 +++++++++++++++++++ 13 files changed, 44 insertions(+), 36 deletions(-) diff --git a/include/dlaf/communication/communicator_grid.h b/include/dlaf/communication/communicator_grid.h index 4099fb5945..a2c7f973cf 100644 --- a/include/dlaf/communication/communicator_grid.h +++ b/include/dlaf/communication/communicator_grid.h @@ -131,6 +131,11 @@ class CommunicatorGrid { return col_communicator_pipeline(); } + /// Synchronization barrier for all communicators in the grid + /// + /// This function returns when all communications on the communicators in the grid have completed. + void wait_all_communicators(); + /// Prints information about the CommunicationGrid. friend std::ostream& operator<<(std::ostream& out, const CommunicatorGrid& grid) { return out << "position=" << grid.position_ << ", size=" << grid.grid_size_; diff --git a/miniapp/miniapp_band_to_tridiag.cpp b/miniapp/miniapp_band_to_tridiag.cpp index bace372c48..b820515453 100644 --- a/miniapp/miniapp_band_to_tridiag.cpp +++ b/miniapp/miniapp_band_to_tridiag.cpp @@ -129,7 +129,7 @@ struct BandToTridiagMiniapp { // wait and barrier for all ranks trid.waitLocalTiles(); hhr.waitLocalTiles(); - DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); + comm_grid.wait_all_communicators(); elapsed_time = timeit.elapsed(); } diff --git a/miniapp/miniapp_bt_band_to_tridiag.cpp b/miniapp/miniapp_bt_band_to_tridiag.cpp index 0b89769198..5b2830436a 100644 --- a/miniapp/miniapp_bt_band_to_tridiag.cpp +++ b/miniapp/miniapp_bt_band_to_tridiag.cpp @@ -138,7 +138,7 @@ struct BacktransformBandToTridiagMiniapp { // wait and barrier for all ranks mat_e.get().waitLocalTiles(); - DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); + comm_grid.wait_all_communicators(); elapsed_time = timeit.elapsed(); } diff --git a/miniapp/miniapp_bt_reduction_to_band.cpp b/miniapp/miniapp_bt_reduction_to_band.cpp index 8761bcd91c..8fcc85d846 100644 --- a/miniapp/miniapp_bt_reduction_to_band.cpp +++ b/miniapp/miniapp_bt_reduction_to_band.cpp @@ -158,7 +158,7 @@ struct BacktransformBandToTridiagMiniapp { // wait and barrier for all ranks mat_e.get().waitLocalTiles(); - DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); + comm_grid.wait_all_communicators(); elapsed_time = timeit.elapsed(); } diff --git a/miniapp/miniapp_cholesky.cpp b/miniapp/miniapp_cholesky.cpp index 8927e7a8a8..362a96aad1 100644 --- a/miniapp/miniapp_cholesky.cpp +++ b/miniapp/miniapp_cholesky.cpp @@ -149,12 +149,7 @@ struct choleskyMiniapp { // wait and barrier for all ranks matrix.get().waitLocalTiles(); - for (std::size_t i = 0; i < comm_grid.num_pipelines(); ++i) { - sync_wait(comm_grid.full_communicator_pipeline().exclusive()); - sync_wait(comm_grid.row_communicator_pipeline().exclusive()); - sync_wait(comm_grid.col_communicator_pipeline().exclusive()); - } - DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); + comm_grid.wait_all_communicators(); elapsed_time = timeit.elapsed(); } diff --git a/miniapp/miniapp_eigensolver.cpp b/miniapp/miniapp_eigensolver.cpp index 4e9b346902..296f52d471 100644 --- a/miniapp/miniapp_eigensolver.cpp +++ b/miniapp/miniapp_eigensolver.cpp @@ -161,12 +161,7 @@ struct EigensolverMiniapp { // wait and barrier for all ranks eigenvectors.waitLocalTiles(); - for (std::size_t i = 0; i < comm_grid.num_pipelines(); ++i) { - sync_wait(comm_grid.full_communicator_pipeline().exclusive()); - sync_wait(comm_grid.row_communicator_pipeline().exclusive()); - sync_wait(comm_grid.col_communicator_pipeline().exclusive()); - } - DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); + comm_grid.wait_all_communicators(); double elapsed_time = timeit.elapsed(); #ifdef DLAF_WITH_HDF5 diff --git a/miniapp/miniapp_gen_eigensolver.cpp b/miniapp/miniapp_gen_eigensolver.cpp index 8f416f903c..20bc04e590 100644 --- a/miniapp/miniapp_gen_eigensolver.cpp +++ b/miniapp/miniapp_gen_eigensolver.cpp @@ -193,12 +193,7 @@ struct GenEigensolverMiniapp { // wait and barrier for all ranks eigenvectors.waitLocalTiles(); - for (std::size_t i = 0; i < comm_grid.num_pipelines(); ++i) { - sync_wait(comm_grid.full_communicator_pipeline().exclusive()); - sync_wait(comm_grid.row_communicator_pipeline().exclusive()); - sync_wait(comm_grid.col_communicator_pipeline().exclusive()); - } - DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); + comm_grid.wait_all_communicators(); double elapsed_time = timeit.elapsed(); #ifdef DLAF_WITH_HDF5 diff --git a/miniapp/miniapp_gen_to_std.cpp b/miniapp/miniapp_gen_to_std.cpp index 8fa3a8c70d..6b96e98252 100644 --- a/miniapp/miniapp_gen_to_std.cpp +++ b/miniapp/miniapp_gen_to_std.cpp @@ -140,7 +140,7 @@ struct GenToStdMiniapp { // wait and barrier for all ranks matrix_a.get().waitLocalTiles(); - DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); + comm_grid.wait_all_communicators(); elapsed_time = timeit.elapsed(); } diff --git a/miniapp/miniapp_reduction_to_band.cpp b/miniapp/miniapp_reduction_to_band.cpp index 67cb16a678..280ede9624 100644 --- a/miniapp/miniapp_reduction_to_band.cpp +++ b/miniapp/miniapp_reduction_to_band.cpp @@ -161,12 +161,7 @@ struct reductionToBandMiniapp { // wait and barrier for all ranks matrix.waitLocalTiles(); taus.waitLocalTiles(); - for (std::size_t i = 0; i < comm_grid.num_pipelines(); ++i) { - sync_wait(comm_grid.full_communicator_pipeline().exclusive()); - sync_wait(comm_grid.row_communicator_pipeline().exclusive()); - sync_wait(comm_grid.col_communicator_pipeline().exclusive()); - } - DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); + comm_grid.wait_all_communicators(); elapsed_time = timeit.elapsed(); } diff --git a/miniapp/miniapp_triangular_multiplication.cpp b/miniapp/miniapp_triangular_multiplication.cpp index 30b8704374..f4a1995650 100644 --- a/miniapp/miniapp_triangular_multiplication.cpp +++ b/miniapp/miniapp_triangular_multiplication.cpp @@ -134,12 +134,7 @@ struct triangularMultiplicationMiniapp { auto sync_barrier = [&]() { a.get().waitLocalTiles(); b.get().waitLocalTiles(); - for (std::size_t i = 0; i < comm_grid.num_pipelines(); ++i) { - sync_wait(comm_grid.full_communicator_pipeline().exclusive()); - sync_wait(comm_grid.row_communicator_pipeline().exclusive()); - sync_wait(comm_grid.col_communicator_pipeline().exclusive()); - } - DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); + comm_grid.wait_all_communicators(); }; const T alpha = 2.0; diff --git a/miniapp/miniapp_triangular_solver.cpp b/miniapp/miniapp_triangular_solver.cpp index 3ebdb9440e..149c54567d 100644 --- a/miniapp/miniapp_triangular_solver.cpp +++ b/miniapp/miniapp_triangular_solver.cpp @@ -133,7 +133,7 @@ struct triangularSolverMiniapp { auto sync_barrier = [&]() { a.get().waitLocalTiles(); b.get().waitLocalTiles(); - DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); + comm_grid.wait_all_communicators(); }; const T alpha = 2.0; diff --git a/miniapp/miniapp_tridiag_solver.cpp b/miniapp/miniapp_tridiag_solver.cpp index 3457748c62..16d0789c84 100644 --- a/miniapp/miniapp_tridiag_solver.cpp +++ b/miniapp/miniapp_tridiag_solver.cpp @@ -154,7 +154,7 @@ struct TridiagSolverMiniapp { tridiag.waitLocalTiles(); evals_mirror.get().waitLocalTiles(); evecs_mirror.get().waitLocalTiles(); - DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); + comm_grid.wait_all_communicators(); elapsed_time = timeit.elapsed(); } diff --git a/src/communication/communicator_grid.cpp b/src/communication/communicator_grid.cpp index 61a1222ded..05da5d5d2e 100644 --- a/src/communication/communicator_grid.cpp +++ b/src/communication/communicator_grid.cpp @@ -9,10 +9,18 @@ // #include +#include +#include + +#include + +#include #include #include +#include #include +#include namespace dlaf { namespace comm { @@ -66,5 +74,25 @@ CommunicatorGrid::CommunicatorGrid(Communicator comm, IndexT_MPI nrows, IndexT_M return CommunicatorPipeline{col_.clone(), position_, grid_size_}; })); } + +void CommunicatorGrid::wait_all_communicators() { + using pika::execution::experimental::drop_value; + using pika::execution::experimental::unique_any_sender; + using pika::execution::experimental::when_all_vector; + using pika::this_thread::experimental::sync_wait; + + constexpr auto barrier = [](const Communicator& comm, MPI_Request* req) { + DLAF_MPI_CHECK_ERROR(MPI_Ibarrier(comm, req)); + }; + + std::vector> senders; + senders.reserve(3 * num_pipelines()); + for (std::size_t i = 0; i < num_pipelines(); ++i) { + senders.push_back(full_communicator_pipeline().exclusive() | internal::transformMPI(barrier)); + senders.push_back(row_communicator_pipeline().exclusive() | internal::transformMPI(barrier)); + senders.push_back(col_communicator_pipeline().exclusive() | internal::transformMPI(barrier)); + } + sync_wait(when_all_vector(std::move(senders))); +} } // namespace comm } // namespace dlaf From 9b71b313eca75c389852891029560245ffd0ae87 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Sep 2024 20:53:42 +0200 Subject: [PATCH 18/33] Simplify check for no tests in ctest_to_gitlab.sh --- ci/ctest_to_gitlab.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/ctest_to_gitlab.sh b/ci/ctest_to_gitlab.sh index 52e6716e87..c13f69d53a 100755 --- a/ci/ctest_to_gitlab.sh +++ b/ci/ctest_to_gitlab.sh @@ -109,7 +109,7 @@ for rank_label in `ctest --print-labels | egrep -o "RANK_[1-9][0-9]?"`; do C=$(( THREADS_PER_NODE / N )) # Skip label combinations that match no tests - if ! ctest -N -L $category_label -L $rank_label | egrep --quiet "^Total Tests: [1-9][0-9]?$"; then + if [[ "$(ctest -N -L $category_label -L $rank_label | tail -n1)" == "Total Tests: 0" ]]; then continue fi From a49c151f29c8e52fe7b7c75859f2afdef2145943 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 27 Sep 2024 13:44:22 +0200 Subject: [PATCH 19/33] Remove unused using sync_wait --- miniapp/miniapp_reduction_to_band.cpp | 2 -- miniapp/miniapp_triangular_multiplication.cpp | 1 - 2 files changed, 3 deletions(-) diff --git a/miniapp/miniapp_reduction_to_band.cpp b/miniapp/miniapp_reduction_to_band.cpp index 280ede9624..306bb473a8 100644 --- a/miniapp/miniapp_reduction_to_band.cpp +++ b/miniapp/miniapp_reduction_to_band.cpp @@ -97,8 +97,6 @@ struct Options struct reductionToBandMiniapp { template static void run(const Options& opts) { - using pika::this_thread::experimental::sync_wait; - using namespace dlaf; using dlaf::SizeType; using dlaf::comm::Communicator; diff --git a/miniapp/miniapp_triangular_multiplication.cpp b/miniapp/miniapp_triangular_multiplication.cpp index f4a1995650..4078a0fd48 100644 --- a/miniapp/miniapp_triangular_multiplication.cpp +++ b/miniapp/miniapp_triangular_multiplication.cpp @@ -51,7 +51,6 @@ using dlaf::comm::CommunicatorGrid; using dlaf::common::Ordering; using dlaf::matrix::Matrix; using dlaf::matrix::MatrixMirror; -using pika::this_thread::experimental::sync_wait; struct Options : dlaf::miniapp::MiniappOptions { From a44a3f64d3fbe5475f4f1174213c6872026a5bef Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 7 Nov 2024 13:31:36 +0100 Subject: [PATCH 20/33] Don't use gpu2ranks_slurm_cuda on santis, use --gpus-per-task instead --- scripts/systems.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/systems.py b/scripts/systems.py index c87e79d760..12d24be57d 100644 --- a/scripts/systems.py +++ b/scripts/systems.py @@ -198,15 +198,13 @@ """, } -# NOTE: Here is assumed that `gpu2ranks_slurm_cuda` is in PATH! -# modify "Run command" if it is not the case. cscs["santis"] = { "Cores": 288, "Threads per core": 1, "Allowed rpns": [4], "Multiple rpn in same job": True, "GPU": True, - "Run command": "srun -u {srun_args} -n {total_ranks} --cpu-bind=core -c {threads_per_rank} gpu2ranks_slurm_cuda", + "Run command": "srun -u {srun_args} -n {total_ranks} --gpus-per-task=1 --cpu-bind=core -c {threads_per_rank}", "Launch command": "sbatch --chdir={job_path} {job_file}", "Batch preamble": """ #!/bin/bash -l From 23c1401d6968ec74924ad1c470e96170de9b5c4e Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 7 Nov 2024 13:32:38 +0100 Subject: [PATCH 21/33] Remove outdated workaround for MPICH bug on santis --- scripts/systems.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/systems.py b/scripts/systems.py index 12d24be57d..a15fc37644 100644 --- a/scripts/systems.py +++ b/scripts/systems.py @@ -217,7 +217,6 @@ #SBATCH --no-requeue # Env -export MPICH_OPT_THREAD_SYNC=0 # Required to work around MPICH bug export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_LARGE_OS_PAGES=1 From f0311aa86230130ca822109f9fe1e6ca7da220a9 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 7 Nov 2024 13:33:40 +0100 Subject: [PATCH 22/33] Add useful DLAF environment variables for santis --- scripts/systems.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/systems.py b/scripts/systems.py index a15fc37644..a226c369fc 100644 --- a/scripts/systems.py +++ b/scripts/systems.py @@ -219,6 +219,8 @@ # Env export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_LARGE_OS_PAGES=1 +export DLAF_BT_BAND_TO_TRIDIAG_HH_APPLY_GROUP_SIZE=128 +export DLAF_UMPIRE_DEVICE_MEMORY_POOL_ALIGNMENT_BYTES=$((1 << 21)) # Debug module list &> modules_{bs_name}.txt From fa551520dd75c29ab0cf4fd6ffafeea0146ef205 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 7 Nov 2024 13:34:16 +0100 Subject: [PATCH 23/33] Add libfabric environment variable to avoid hangs on santis --- scripts/systems.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/systems.py b/scripts/systems.py index a226c369fc..8589590df7 100644 --- a/scripts/systems.py +++ b/scripts/systems.py @@ -217,6 +217,7 @@ #SBATCH --no-requeue # Env +export FI_MR_CACHE_MONITOR=disabled export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_LARGE_OS_PAGES=1 export DLAF_BT_BAND_TO_TRIDIAG_HH_APPLY_GROUP_SIZE=128 From 22ff1362fd16a6fc18946e571f7cc01a70eac830 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 7 Nov 2024 13:35:11 +0100 Subject: [PATCH 24/33] Update lumi-gpu environment variables --- scripts/systems.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/systems.py b/scripts/systems.py index 8589590df7..dc95f61dc8 100644 --- a/scripts/systems.py +++ b/scripts/systems.py @@ -293,9 +293,10 @@ #SBATCH --no-requeue # Env -export MPICH_MAX_THREAD_SAFETY=multiple +export FI_MR_CACHE_MONITOR=disabled export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_LARGE_OS_PAGES=1 +export DLAF_BT_BAND_TO_TRIDIAG_HH_APPLY_GROUP_SIZE=128 # Debug module list &> modules_{bs_name}.txt From 03b0cd5292675e51e17ccbd487f1201e776f4e49 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 7 Nov 2024 13:35:48 +0100 Subject: [PATCH 25/33] Remove MPICH_MAX_THREAD_SAFETY environment variable from systems that don't need it --- scripts/systems.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/systems.py b/scripts/systems.py index dc95f61dc8..f38be4500b 100644 --- a/scripts/systems.py +++ b/scripts/systems.py @@ -117,7 +117,6 @@ #SBATCH --no-requeue # Env -export MPICH_MAX_THREAD_SAFETY=multiple export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_LARGE_OS_PAGES=1 @@ -152,7 +151,6 @@ #SBATCH --no-requeue # Env -export MPICH_MAX_THREAD_SAFETY=multiple export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_LARGE_OS_PAGES=1 @@ -186,7 +184,6 @@ #SBATCH --no-requeue # Env -export MPICH_MAX_THREAD_SAFETY=multiple export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_LARGE_OS_PAGES=1 @@ -254,7 +251,6 @@ #SBATCH --no-requeue # Env -export MPICH_MAX_THREAD_SAFETY=multiple export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_LARGE_OS_PAGES=1 From 6b784803d7ede28f2c2a2bfb25b2611dc578ca02 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 7 Nov 2024 13:36:47 +0100 Subject: [PATCH 26/33] Rename MIMALLOC_LARGE_OS_PAGES to MIMALLOC_ALLOW_LARGE_OS_PAGES --- scripts/systems.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/systems.py b/scripts/systems.py index f38be4500b..924882d94b 100644 --- a/scripts/systems.py +++ b/scripts/systems.py @@ -54,7 +54,7 @@ # Env export MPICH_MAX_THREAD_SAFETY=multiple export MIMALLOC_EAGER_COMMIT_DELAY=0 -export MIMALLOC_LARGE_OS_PAGES=1 +export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 # Debug module list &> modules_{bs_name}.txt @@ -87,7 +87,7 @@ # Env export MPICH_MAX_THREAD_SAFETY=multiple export MIMALLOC_EAGER_COMMIT_DELAY=0 -export MIMALLOC_LARGE_OS_PAGES=1 +export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 # Debug module list &> modules_{bs_name}.txt @@ -118,7 +118,7 @@ # Env export MIMALLOC_EAGER_COMMIT_DELAY=0 -export MIMALLOC_LARGE_OS_PAGES=1 +export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 # Debug module list &> modules_{bs_name}.txt @@ -152,7 +152,7 @@ # Env export MIMALLOC_EAGER_COMMIT_DELAY=0 -export MIMALLOC_LARGE_OS_PAGES=1 +export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 # Debug module list &> modules_{bs_name}.txt @@ -185,7 +185,7 @@ # Env export MIMALLOC_EAGER_COMMIT_DELAY=0 -export MIMALLOC_LARGE_OS_PAGES=1 +export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 # Debug module list &> modules_{bs_name}.txt @@ -216,7 +216,7 @@ # Env export FI_MR_CACHE_MONITOR=disabled export MIMALLOC_EAGER_COMMIT_DELAY=0 -export MIMALLOC_LARGE_OS_PAGES=1 +export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 export DLAF_BT_BAND_TO_TRIDIAG_HH_APPLY_GROUP_SIZE=128 export DLAF_UMPIRE_DEVICE_MEMORY_POOL_ALIGNMENT_BYTES=$((1 << 21)) @@ -252,7 +252,7 @@ # Env export MIMALLOC_EAGER_COMMIT_DELAY=0 -export MIMALLOC_LARGE_OS_PAGES=1 +export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 # Debug module list &> modules_{bs_name}.txt @@ -291,7 +291,7 @@ # Env export FI_MR_CACHE_MONITOR=disabled export MIMALLOC_EAGER_COMMIT_DELAY=0 -export MIMALLOC_LARGE_OS_PAGES=1 +export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 export DLAF_BT_BAND_TO_TRIDIAG_HH_APPLY_GROUP_SIZE=128 # Debug From 017ccaf9d4c4335c843f44f376b20d1787558e0c Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 7 Nov 2024 13:38:41 +0100 Subject: [PATCH 27/33] Set band_to_tridiag 1d block size for lumi-gpu --- scripts/systems.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/systems.py b/scripts/systems.py index 924882d94b..f4e5f13008 100644 --- a/scripts/systems.py +++ b/scripts/systems.py @@ -293,6 +293,7 @@ export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 export DLAF_BT_BAND_TO_TRIDIAG_HH_APPLY_GROUP_SIZE=128 +export DLAF_BAND_TO_TRIDIAG_1D_BLOCK_SIZE_BASE=2048 # Debug module list &> modules_{bs_name}.txt From a75284f7e289494dc6fbdbc8c32d5452ec42d580 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 7 Nov 2024 13:39:38 +0100 Subject: [PATCH 28/33] Add comment for allocation alignment on santis --- scripts/systems.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/systems.py b/scripts/systems.py index f4e5f13008..9a3bb229a6 100644 --- a/scripts/systems.py +++ b/scripts/systems.py @@ -218,7 +218,7 @@ export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 export DLAF_BT_BAND_TO_TRIDIAG_HH_APPLY_GROUP_SIZE=128 -export DLAF_UMPIRE_DEVICE_MEMORY_POOL_ALIGNMENT_BYTES=$((1 << 21)) +export DLAF_UMPIRE_DEVICE_MEMORY_POOL_ALIGNMENT_BYTES=$((1 << 21)) # 2 MiB, large page size # Debug module list &> modules_{bs_name}.txt From ada03ea82a84373d695cfd1c46b57699e93f70c2 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 20 Nov 2024 12:09:09 +0100 Subject: [PATCH 29/33] Add MPICH_GPU_SUPPORT_ENABLED=1 to clariden, santis, and lumi-gpu system configurations --- scripts/systems.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/systems.py b/scripts/systems.py index 9a3bb229a6..8d88bb471d 100644 --- a/scripts/systems.py +++ b/scripts/systems.py @@ -151,6 +151,7 @@ #SBATCH --no-requeue # Env +export MPICH_GPU_SUPPORT_ENABLED=1 export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 @@ -184,6 +185,7 @@ #SBATCH --no-requeue # Env +export MPICH_GPU_SUPPORT_ENABLED=1 export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 @@ -215,6 +217,7 @@ # Env export FI_MR_CACHE_MONITOR=disabled +export MPICH_GPU_SUPPORT_ENABLED=1 export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 export DLAF_BT_BAND_TO_TRIDIAG_HH_APPLY_GROUP_SIZE=128 @@ -290,6 +293,7 @@ # Env export FI_MR_CACHE_MONITOR=disabled +export MPICH_GPU_SUPPORT_ENABLED=1 export MIMALLOC_EAGER_COMMIT_DELAY=0 export MIMALLOC_ALLOW_LARGE_OS_PAGES=1 export DLAF_BT_BAND_TO_TRIDIAG_HH_APPLY_GROUP_SIZE=128 From 6f88fe2c0f2b12fb08f85aa1be061f2a23e054c9 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 22 Nov 2024 09:56:20 +0100 Subject: [PATCH 30/33] Remove --dlaf:bt-band-to-tridiag-hh-apply-group-size command line argument from benchmarking scripts extra_flags Prefer default, or set with environment variable. --- scripts/gen_dlaf_strong-gpu.py | 2 +- scripts/gen_dlaf_strong-mc.py | 2 +- scripts/gen_dlaf_weak-gpu.py | 2 +- scripts/gen_dlaf_weak-mc.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/gen_dlaf_strong-gpu.py b/scripts/gen_dlaf_strong-gpu.py index b7efbe43d2..2ac9097603 100755 --- a/scripts/gen_dlaf_strong-gpu.py +++ b/scripts/gen_dlaf_strong-gpu.py @@ -35,7 +35,7 @@ m_szs_z = [10240, 20480] mb_szs_z = 512 -extra_flags = "--dlaf:bt-band-to-tridiag-hh-apply-group-size=128" +extra_flags = "" parser = argparse.ArgumentParser(description="Run strong scaling benchmarks.") parser.add_argument( diff --git a/scripts/gen_dlaf_strong-mc.py b/scripts/gen_dlaf_strong-mc.py index 1d4471aab8..d02f15a1b6 100755 --- a/scripts/gen_dlaf_strong-mc.py +++ b/scripts/gen_dlaf_strong-mc.py @@ -35,7 +35,7 @@ m_szs_z = [10240, 20480] mb_szs_z = 512 -extra_flags = "--dlaf:bt-band-to-tridiag-hh-apply-group-size=64" +extra_flags = "" parser = argparse.ArgumentParser(description="Run strong scaling benchmarks.") parser.add_argument( diff --git a/scripts/gen_dlaf_weak-gpu.py b/scripts/gen_dlaf_weak-gpu.py index 10bebcc752..94820f0c67 100755 --- a/scripts/gen_dlaf_weak-gpu.py +++ b/scripts/gen_dlaf_weak-gpu.py @@ -36,7 +36,7 @@ m_szs_z = [10240, 20480] mb_szs_z = 512 -extra_flags = "--dlaf:bt-band-to-tridiag-hh-apply-group-size=128" +extra_flags = "" approx = 512 # the sizes used in weak scaling are chosen to be the nearest multiple of approx. diff --git a/scripts/gen_dlaf_weak-mc.py b/scripts/gen_dlaf_weak-mc.py index c96a6b512d..9692f7a292 100755 --- a/scripts/gen_dlaf_weak-mc.py +++ b/scripts/gen_dlaf_weak-mc.py @@ -36,7 +36,7 @@ m_szs_z = [10240, 20480] mb_szs_z = 512 -extra_flags = "--dlaf:bt-band-to-tridiag-hh-apply-group-size=64" +extra_flags = "" approx = 512 # the sizes used in weak scaling are chosen to be the nearest multiple of approx. From 3fae37f54fab51dbfb0f31204b860e55855e7013 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Mon, 25 Nov 2024 11:30:21 +0100 Subject: [PATCH 31/33] Fix stray quote in ctest_to_gitlab.sh --- ci/ctest_to_gitlab.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/ctest_to_gitlab.sh b/ci/ctest_to_gitlab.sh index 456dc87764..e0d12f91cd 100755 --- a/ci/ctest_to_gitlab.sh +++ b/ci/ctest_to_gitlab.sh @@ -102,7 +102,7 @@ JOB_TEMPLATE=" USE_MPI: 'YES' DISABLE_AFTER_SCRIPT: 'YES' DLAF_HDF5_TEST_OUTPUT_PATH: \$CI_PROJECT_DIR - script: mpi-ctest -L {{CATEGORY_LABEL}} -L {{RANK_LABEL}}" + script: mpi-ctest -L {{CATEGORY_LABEL}} -L {{RANK_LABEL}} artifacts: when: always paths: From b3a342a6805f750ac3167b7ea164b6c7a49f93a6 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 29 Nov 2024 16:02:01 +0100 Subject: [PATCH 32/33] Add a bit more synchronization to eigensolver miniapps --- miniapp/miniapp_eigensolver.cpp | 3 +++ miniapp/miniapp_gen_eigensolver.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/miniapp/miniapp_eigensolver.cpp b/miniapp/miniapp_eigensolver.cpp index c12b259de2..dd9e5eec88 100644 --- a/miniapp/miniapp_eigensolver.cpp +++ b/miniapp/miniapp_eigensolver.cpp @@ -218,6 +218,9 @@ struct EigensolverMiniapp { checkEigensolver(comm_grid, opts.uplo, matrix_ref, eigenvalues_host.get(), eigenvectors_host.get()); } + + eigenvalues.waitLocalTiles(); + eigenvectors.waitLocalTiles(); } } }; diff --git a/miniapp/miniapp_gen_eigensolver.cpp b/miniapp/miniapp_gen_eigensolver.cpp index 5b96110894..479c751a48 100644 --- a/miniapp/miniapp_gen_eigensolver.cpp +++ b/miniapp/miniapp_gen_eigensolver.cpp @@ -246,6 +246,9 @@ struct GenEigensolverMiniapp { checkGenEigensolver(comm_grid, opts.uplo, matrix_a_ref, matrix_b_ref, eigenvalues_host.get(), eigenvectors_host.get()); } + + eigenvalues.waitLocalTiles(); + eigenvectors.waitLocalTiles(); } } }; From ce64504809bb3c3396065cd951e95093b13c3f1a Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 29 Nov 2024 16:42:09 +0100 Subject: [PATCH 33/33] Add yet more synchronization to eigensolver miniapps --- miniapp/miniapp_eigensolver.cpp | 2 ++ miniapp/miniapp_gen_eigensolver.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/miniapp/miniapp_eigensolver.cpp b/miniapp/miniapp_eigensolver.cpp index dd9e5eec88..f5606a665c 100644 --- a/miniapp/miniapp_eigensolver.cpp +++ b/miniapp/miniapp_eigensolver.cpp @@ -222,6 +222,8 @@ struct EigensolverMiniapp { eigenvalues.waitLocalTiles(); eigenvectors.waitLocalTiles(); } + + comm_grid.wait_all_communicators(); } }; diff --git a/miniapp/miniapp_gen_eigensolver.cpp b/miniapp/miniapp_gen_eigensolver.cpp index 479c751a48..94132c6235 100644 --- a/miniapp/miniapp_gen_eigensolver.cpp +++ b/miniapp/miniapp_gen_eigensolver.cpp @@ -250,6 +250,8 @@ struct GenEigensolverMiniapp { eigenvalues.waitLocalTiles(); eigenvectors.waitLocalTiles(); } + + comm_grid.wait_all_communicators(); } };