From c58674678c8cd98a47a2fdb267b4eddc3ed5a783 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Mon, 17 Jun 2024 10:27:44 +0200 Subject: [PATCH 01/15] gen eig factorized --- include/dlaf/eigensolver/gen_eigensolver.h | 282 +++++++++++++++--- .../dlaf/eigensolver/gen_eigensolver/api.h | 7 +- .../dlaf/eigensolver/gen_eigensolver/impl.h | 13 +- 3 files changed, 261 insertions(+), 41 deletions(-) diff --git a/include/dlaf/eigensolver/gen_eigensolver.h b/include/dlaf/eigensolver/gen_eigensolver.h index 495018f7c3..110ed9d1d1 100644 --- a/include/dlaf/eigensolver/gen_eigensolver.h +++ b/include/dlaf/eigensolver/gen_eigensolver.h @@ -11,6 +11,7 @@ /// @file +#include "gen_eigensolver/api.h" #include #include @@ -22,6 +23,69 @@ namespace dlaf { +namespace eigensolver::internal{ + +template +void hermitian_generalized_eigensolver_helper(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, + Matrix, D>& eigenvalues, Matrix& eigenvectors, const Factorization factorization) { + DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); + DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); + DLAF_ASSERT(matrix::local_matrix(eigenvalues), eigenvalues); + DLAF_ASSERT(matrix::local_matrix(eigenvectors), eigenvectors); + DLAF_ASSERT(matrix::square_size(mat_a), mat_a); + DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(matrix::square_size(mat_b), mat_b); + DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b); + DLAF_ASSERT(matrix::square_size(eigenvectors), eigenvectors); + DLAF_ASSERT(matrix::square_blocksize(eigenvectors), eigenvectors); + DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b); + DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b); + DLAF_ASSERT(eigenvalues.size().rows() == eigenvectors.size().rows(), eigenvalues, eigenvectors); + DLAF_ASSERT(eigenvalues.blockSize().rows() == eigenvectors.blockSize().rows(), eigenvalues, + eigenvectors); + DLAF_ASSERT(eigenvectors.size() == mat_a.size(), eigenvectors, mat_a); + DLAF_ASSERT(eigenvectors.blockSize() == mat_a.blockSize(), eigenvectors, mat_a); + DLAF_ASSERT(matrix::single_tile_per_block(mat_a), mat_a); + DLAF_ASSERT(matrix::single_tile_per_block(mat_b), mat_b); + DLAF_ASSERT(matrix::single_tile_per_block(eigenvalues), eigenvalues); + DLAF_ASSERT(matrix::single_tile_per_block(eigenvectors), eigenvectors); + + eigensolver::internal::GenEigensolver::call(uplo, mat_a, mat_b, eigenvalues, eigenvectors, factorization); +} + +template +void hermitian_generalized_eigensolver_helper(comm::CommunicatorGrid& grid, blas::Uplo uplo, + Matrix& mat_a, Matrix& mat_b, + Matrix, D>& eigenvalues, Matrix& eigenvectors, const Factorization factorization) { + DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); + DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); + DLAF_ASSERT(matrix::local_matrix(eigenvalues), eigenvalues); + DLAF_ASSERT(matrix::equal_process_grid(eigenvectors, grid), eigenvectors, grid); + DLAF_ASSERT(matrix::square_size(mat_a), mat_a); + DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(matrix::square_size(mat_b), mat_b); + DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b); + DLAF_ASSERT(matrix::square_size(eigenvectors), eigenvectors); + DLAF_ASSERT(matrix::square_blocksize(eigenvectors), eigenvectors); + DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b); + DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b); + DLAF_ASSERT(eigenvalues.size().rows() == eigenvectors.size().rows(), eigenvalues, eigenvectors); + DLAF_ASSERT(eigenvalues.blockSize().rows() == eigenvectors.blockSize().rows(), eigenvalues, + eigenvectors); + DLAF_ASSERT(eigenvectors.size() == mat_a.size(), eigenvectors, mat_a); + DLAF_ASSERT(eigenvectors.blockSize() == mat_a.blockSize(), eigenvectors, mat_a); + DLAF_ASSERT(matrix::single_tile_per_block(mat_a), mat_a); + DLAF_ASSERT(matrix::single_tile_per_block(mat_b), mat_b); + DLAF_ASSERT(matrix::single_tile_per_block(eigenvalues), eigenvalues); + DLAF_ASSERT(matrix::single_tile_per_block(eigenvectors), eigenvectors); + + eigensolver::internal::GenEigensolver::call(grid, uplo, mat_a, mat_b, eigenvalues, + eigenvectors, factorization); +} + + +} + /// Generalized Eigensolver. /// /// It solves the generalized eigenvalue problem A * x = lambda * B * x. @@ -63,29 +127,100 @@ namespace dlaf { template void hermitian_generalized_eigensolver(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, Matrix, D>& eigenvalues, Matrix& eigenvectors) { + eigensolver::internal::hermitian_generalized_eigensolver_helper(uplo, mat_a, mat_b, eigenvalues, eigenvectors, eigensolver::internal::Factorization::do_factorization); +} + +/// Generalized Eigensolver. +/// +/// It solves the generalized eigenvalue problem A * x = lambda * B * x. +/// +/// On exit: +/// - the lower triangle or the upper triangle (depending on @p uplo) of @p mat_a, +/// including the diagonal, is destroyed. +/// - @p mat_b contains the Cholesky decomposition of B +/// +/// Implementation on local memory. +/// +/// @return struct ReturnEigensolverType with eigenvalues, as a vector, and eigenvectors as a Matrix +/// @param uplo specifies if upper or lower triangular part of @p mat_a and @p mat_b will be referenced +/// +/// @param mat_a contains the Hermitian matrix A +/// @pre @p mat_a is not distributed +/// @pre @p mat_a has size (N x N) +/// @pre @p mat_a has blocksize (NB x NB) +/// @pre @p mat_a has tilesize (NB x NB) +/// +/// @param mat_b contains the Hermitian positive definite matrix B +/// @pre @p mat_b is not distributed +/// @pre @p mat_b has size (N x N) +/// @pre @p mat_b has blocksize (NB x NB) +/// @pre @p mat_b has tilesize (NB x NB) +template +EigensolverResult hermitian_generalized_eigensolver(blas::Uplo uplo, Matrix& mat_a, + Matrix& mat_b) { DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); - DLAF_ASSERT(matrix::local_matrix(eigenvalues), eigenvalues); - DLAF_ASSERT(matrix::local_matrix(eigenvectors), eigenvectors); DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); DLAF_ASSERT(matrix::square_size(mat_b), mat_b); DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b); - DLAF_ASSERT(matrix::square_size(eigenvectors), eigenvectors); - DLAF_ASSERT(matrix::square_blocksize(eigenvectors), eigenvectors); DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b); DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b); - DLAF_ASSERT(eigenvalues.size().rows() == eigenvectors.size().rows(), eigenvalues, eigenvectors); - DLAF_ASSERT(eigenvalues.blockSize().rows() == eigenvectors.blockSize().rows(), eigenvalues, - eigenvectors); - DLAF_ASSERT(eigenvectors.size() == mat_a.size(), eigenvectors, mat_a); - DLAF_ASSERT(eigenvectors.blockSize() == mat_a.blockSize(), eigenvectors, mat_a); - DLAF_ASSERT(matrix::single_tile_per_block(mat_a), mat_a); - DLAF_ASSERT(matrix::single_tile_per_block(mat_b), mat_b); - DLAF_ASSERT(matrix::single_tile_per_block(eigenvalues), eigenvalues); - DLAF_ASSERT(matrix::single_tile_per_block(eigenvectors), eigenvectors); - eigensolver::internal::GenEigensolver::call(uplo, mat_a, mat_b, eigenvalues, eigenvectors); + const SizeType size = mat_a.size().rows(); + + matrix::Matrix, D> eigenvalues(LocalElementSize(size, 1), + TileElementSize(mat_a.blockSize().rows(), 1)); + matrix::Matrix eigenvectors(LocalElementSize(size, size), mat_a.blockSize()); + + hermitian_generalized_eigensolver(uplo, mat_a, mat_b, eigenvalues, eigenvectors); + + return {std::move(eigenvalues), std::move(eigenvectors)}; +} + +/// Generalized Eigensolver. +/// +/// It solves the generalized eigenvalue problem A * x = lambda * B * x. +/// +/// On exit: +/// - the lower triangle or the upper triangle (depending on @p uplo) of @p mat_a, +/// including the diagonal, is destroyed. +/// - @p mat_b contains the Cholesky decomposition of B +/// - @p eigenvalues contains all the eigenvalues lambda +/// - @p eigenvectors contains all the eigenvectors x +/// +/// Implementation on local memory. +/// +/// @param uplo specifies if upper or lower triangular part of @p mat_a and @p mat_b will be referenced +/// +/// @param mat_a contains the Hermitian matrix A +/// @pre @p mat_a is not distributed +/// @pre @p mat_a has size (N x N) +/// @pre @p mat_a has blocksize (NB x NB) +/// @pre @p mat_a has tilesize (NB x NB) +/// +/// @param mat_b contains the Cholesky factorisation of the Hermitian positive definite matrix B +/// @pre @p mat_b is not distributed +/// @pre @p mat_b has size (N x N) +/// @pre @p mat_b has blocksize (NB x NB) +/// @pre @p mat_b has tilesize (NB x NB) +/// @pre @p mat_b is the result of a Cholesky factorization +/// +/// @param[out] eigenvalues contains the eigenvalues +/// @pre @p eigenvalues is not distributed +/// @pre @p eigenvalues has size (N x 1) +/// @pre @p eigenvalues has blocksize (NB x NB) +/// @pre @p eigenvalues has tilesize (NB x NB) +/// +/// @param[out] eigenvectors contains the eigenvectors +/// @pre @p eigenvectors is not distributed +/// @pre @p eigenvectors has size (N x N) +/// @pre @p eigenvectors has blocksize (NB x NB) +/// @pre @p eigenvectors has tilesize (NB x NB) +template +void hermitian_generalized_eigensolver_factorized(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, + Matrix, D>& eigenvalues, Matrix& eigenvectors) { + eigensolver::internal::hermitian_generalized_eigensolver_helper(uplo, mat_a, mat_b, eigenvalues, eigenvectors, eigensolver::internal::Factorization::already_factorized); } /// Generalized Eigensolver. @@ -108,13 +243,14 @@ void hermitian_generalized_eigensolver(blas::Uplo uplo, Matrix& mat_a, Mat /// @pre @p mat_a has blocksize (NB x NB) /// @pre @p mat_a has tilesize (NB x NB) /// -/// @param mat_b contains the Hermitian positive definite matrix B +/// @param mat_b contains the Cholesky factorisation of the Hermitian positive definite matrix B /// @pre @p mat_b is not distributed /// @pre @p mat_b has size (N x N) /// @pre @p mat_b has blocksize (NB x NB) /// @pre @p mat_b has tilesize (NB x NB) +/// @pre @p mat_b is the result of a Cholesky factorization template -EigensolverResult hermitian_generalized_eigensolver(blas::Uplo uplo, Matrix& mat_a, +EigensolverResult hermitian_generalized_eigensolver_factorized(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b) { DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); @@ -131,7 +267,7 @@ EigensolverResult hermitian_generalized_eigensolver(blas::Uplo uplo, Matri TileElementSize(mat_a.blockSize().rows(), 1)); matrix::Matrix eigenvectors(LocalElementSize(size, size), mat_a.blockSize()); - hermitian_generalized_eigensolver(uplo, mat_a, mat_b, eigenvalues, eigenvectors); + hermitian_generalized_eigensolver_factorized(uplo, mat_a, mat_b, eigenvalues, eigenvectors); return {std::move(eigenvalues), std::move(eigenvectors)}; } @@ -179,30 +315,105 @@ template void hermitian_generalized_eigensolver(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, Matrix, D>& eigenvalues, Matrix& eigenvectors) { + eigensolver::internal::hermitian_generalized_eigensolver_helper(grid, uplo, mat_a, mat_b, eigenvalues, + eigenvectors, eigensolver::internal::Factorization::do_factorization); +} + +/// Generalized Eigensolver. +/// +/// It solves the generalized eigenvalue problem A * x = lambda * B * x. +/// +/// On exit: +/// - the lower triangle or the upper triangle (depending on @p uplo) of @p mat_a, +/// including the diagonal, is destroyed. +/// - @p mat_b contains the Cholesky decomposition of B +/// +/// Implementation on distributed memory. +/// +/// @return struct ReturnEigensolverType with eigenvalues, as a vector, and eigenvectors as a Matrix +/// @param grid is the communicator grid on which the matrices @p mat_a and @p mat_b have been distributed, +/// @param uplo specifies if upper or lower triangular part of @p mat_a and @p mat_b will be referenced +/// +/// @param mat_a contains the Hermitian matrix A +/// @pre @p mat_a is distributed according to @p grid +/// @pre @p mat_a has size (N x N) +/// @pre @p mat_a has blocksize (NB x NB) +/// @pre @p mat_a has tilesize (NB x NB) +/// +/// @param mat_b contains the Hermitian positive definite matrix B +/// @pre @p mat_b is distributed according to @p grid +/// @pre @p mat_b has size (N x N) +/// @pre @p mat_b has blocksize (NB x NB) +/// @pre @p mat_b has tilesize (NB x NB) +template +EigensolverResult hermitian_generalized_eigensolver(comm::CommunicatorGrid& grid, blas::Uplo uplo, + Matrix& mat_a, Matrix& mat_b) { DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); - DLAF_ASSERT(matrix::local_matrix(eigenvalues), eigenvalues); - DLAF_ASSERT(matrix::equal_process_grid(eigenvectors, grid), eigenvectors, grid); DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); DLAF_ASSERT(matrix::square_size(mat_b), mat_b); DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b); - DLAF_ASSERT(matrix::square_size(eigenvectors), eigenvectors); - DLAF_ASSERT(matrix::square_blocksize(eigenvectors), eigenvectors); DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b); DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b); - DLAF_ASSERT(eigenvalues.size().rows() == eigenvectors.size().rows(), eigenvalues, eigenvectors); - DLAF_ASSERT(eigenvalues.blockSize().rows() == eigenvectors.blockSize().rows(), eigenvalues, - eigenvectors); - DLAF_ASSERT(eigenvectors.size() == mat_a.size(), eigenvectors, mat_a); - DLAF_ASSERT(eigenvectors.blockSize() == mat_a.blockSize(), eigenvectors, mat_a); - DLAF_ASSERT(matrix::single_tile_per_block(mat_a), mat_a); - DLAF_ASSERT(matrix::single_tile_per_block(mat_b), mat_b); - DLAF_ASSERT(matrix::single_tile_per_block(eigenvalues), eigenvalues); - DLAF_ASSERT(matrix::single_tile_per_block(eigenvectors), eigenvectors); - eigensolver::internal::GenEigensolver::call(grid, uplo, mat_a, mat_b, eigenvalues, - eigenvectors); + const SizeType size = mat_a.size().rows(); + + matrix::Matrix, D> eigenvalues(LocalElementSize(size, 1), + TileElementSize(mat_a.blockSize().rows(), 1)); + matrix::Matrix eigenvectors(GlobalElementSize(size, size), mat_a.blockSize(), grid); + + hermitian_generalized_eigensolver(grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors); + + return {std::move(eigenvalues), std::move(eigenvectors)}; +} + +/// Generalized Eigensolver. +/// +/// It solves the generalized eigenvalue problem A * x = lambda * B * x. +/// +/// On exit: +/// - the lower triangle or the upper triangle (depending on @p uplo) of @p mat_a, +/// including the diagonal, is destroyed. +/// - @p mat_b contains the Cholesky decomposition of B +/// - @p eigenvalues contains all the eigenvalues lambda +/// - @p eigenvectors contains all the eigenvectors x +/// +/// Implementation on distributed memory. +/// +/// @param grid is the communicator grid on which the matrices @p mat_a and @p mat_b have been distributed, +/// @param uplo specifies if upper or lower triangular part of @p mat_a and @p mat_b will be referenced +/// +/// @param mat_a contains the Hermitian matrix A +/// @pre @p mat_a is distributed according to @p grid +/// @pre @p mat_a has size (N x N) +/// @pre @p mat_a has blocksize (NB x NB) +/// @pre @p mat_a has tilesize (NB x NB) +/// +/// @param mat_b contains the Cholesky factorisation of the Hermitian positive definite matrix B +/// @pre @p mat_b is distributed according to @p grid +/// @pre @p mat_b has size (N x N) +/// @pre @p mat_b has blocksize (NB x NB) +/// @pre @p mat_b has tilesize (NB x NB) +/// @pre @p mat_b is the result of a Cholesky factorization +/// +/// @param eigenvalues is a N x 1 matrix which on output contains the eigenvalues +/// @pre @p eigenvalues is not distributed +/// @pre @p eigenvalues has size (N x 1) +/// @pre @p eigenvalues has blocksize (NB x 1) +/// @pre @p eigenvalues has tilesize (NB x 1) +/// +/// @param[out] eigenvectors contains the eigenvectors +/// @pre @p eigenvectors is distributed according to @p grid +/// @pre @p eigenvectors has size (N x N) +/// @pre @p eigenvectors has blocksize (NB x NB) +/// @pre @p eigenvectors has tilesize (NB x NB) +template +void hermitian_generalized_eigensolver_factorized(comm::CommunicatorGrid& grid, blas::Uplo uplo, + Matrix& mat_a, Matrix& mat_b, + Matrix, D>& eigenvalues, Matrix& eigenvectors) { + eigensolver::internal::hermitian_generalized_eigensolver_helper(grid, uplo, mat_a, mat_b, eigenvalues, + eigenvectors, eigensolver::internal::Factorization::already_factorized); } /// Generalized Eigensolver. @@ -226,13 +437,14 @@ void hermitian_generalized_eigensolver(comm::CommunicatorGrid& grid, blas::Uplo /// @pre @p mat_a has blocksize (NB x NB) /// @pre @p mat_a has tilesize (NB x NB) /// -/// @param mat_b contains the Hermitian positive definite matrix B +/// @param mat_b contains the Cholesky factorisation of the Hermitian positive definite matrix B /// @pre @p mat_b is distributed according to @p grid /// @pre @p mat_b has size (N x N) /// @pre @p mat_b has blocksize (NB x NB) /// @pre @p mat_b has tilesize (NB x NB) +/// @pre @p mat_b is the result of a Cholesky factorization template -EigensolverResult hermitian_generalized_eigensolver(comm::CommunicatorGrid& grid, blas::Uplo uplo, +EigensolverResult hermitian_generalized_eigensolver_factorized(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b) { DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); @@ -249,7 +461,7 @@ EigensolverResult hermitian_generalized_eigensolver(comm::CommunicatorGrid TileElementSize(mat_a.blockSize().rows(), 1)); matrix::Matrix eigenvectors(GlobalElementSize(size, size), mat_a.blockSize(), grid); - hermitian_generalized_eigensolver(grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors); + hermitian_generalized_eigensolver_factorized(grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors); return {std::move(eigenvalues), std::move(eigenvectors)}; } diff --git a/include/dlaf/eigensolver/gen_eigensolver/api.h b/include/dlaf/eigensolver/gen_eigensolver/api.h index 3e09641dc8..0023e21e54 100644 --- a/include/dlaf/eigensolver/gen_eigensolver/api.h +++ b/include/dlaf/eigensolver/gen_eigensolver/api.h @@ -15,13 +15,16 @@ #include namespace dlaf::eigensolver::internal { + +enum class Factorization {do_factorization, already_factorized}; + template struct GenEigensolver { static void call(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, - Matrix, device>& eigenvalues, Matrix& eigenvectors); + Matrix, device>& eigenvalues, Matrix& eigenvectors, const Factorization factorization); static void call(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, Matrix, device>& eigenvalues, - Matrix& eigenvectors); + Matrix& eigenvectors, const Factorization factorizatio); }; // ETI diff --git a/include/dlaf/eigensolver/gen_eigensolver/impl.h b/include/dlaf/eigensolver/gen_eigensolver/impl.h index d3f801c999..6a6d295d81 100644 --- a/include/dlaf/eigensolver/gen_eigensolver/impl.h +++ b/include/dlaf/eigensolver/gen_eigensolver/impl.h @@ -9,6 +9,7 @@ // #pragma once +#include "api.h" #include #include #include @@ -21,8 +22,10 @@ namespace dlaf::eigensolver::internal { template void GenEigensolver::call(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, - Matrix, D>& eigenvalues, Matrix& eigenvectors) { - cholesky_factorization(uplo, mat_b); + Matrix, D>& eigenvalues, Matrix& eigenvectors, const Factorization factorization) { + if(factorization == Factorization::do_factorization){ + cholesky_factorization(uplo, mat_b); + } generalized_to_standard(uplo, mat_a, mat_b); hermitian_eigensolver(uplo, mat_a, eigenvalues, eigenvectors); @@ -34,8 +37,10 @@ void GenEigensolver::call(blas::Uplo uplo, Matrix& mat_a, Matrix< template void GenEigensolver::call(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, Matrix, D>& eigenvalues, - Matrix& eigenvectors) { - cholesky_factorization(grid, uplo, mat_b); + Matrix& eigenvectors, const Factorization factorization) { + if(factorization == Factorization::do_factorization){ + cholesky_factorization(grid, uplo, mat_b); + } generalized_to_standard(grid, uplo, mat_a, mat_b); hermitian_eigensolver(grid, uplo, mat_a, eigenvalues, eigenvectors); From ec2860bb4204a3f1a3655a54a48c8d749ed94b77 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Mon, 17 Jun 2024 13:31:12 +0200 Subject: [PATCH 02/15] tests --- .../unit/eigensolver/test_gen_eigensolver.cpp | 65 ++++++++++++++----- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/test/unit/eigensolver/test_gen_eigensolver.cpp b/test/unit/eigensolver/test_gen_eigensolver.cpp index e7df53fcb0..08ba280bbc 100644 --- a/test/unit/eigensolver/test_gen_eigensolver.cpp +++ b/test/unit/eigensolver/test_gen_eigensolver.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -58,6 +59,7 @@ TYPED_TEST_SUITE(GenEigensolverTestGPU, MatrixElementTypes); #endif enum class Allocation { do_allocation, use_preallocated }; +using dlaf::eigensolver::internal::Factorization; const std::vector blas_uplos({blas::Uplo::Lower}); @@ -69,7 +71,7 @@ const std::vector> sizes = { {34, 8, 3}, {32, 6, 3} // m > mb, sub-band }; -template +template void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType mb, GridIfDistributed&... grid) { constexpr bool isDistributed = (sizeof...(grid) == 1); @@ -105,10 +107,22 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType MatrixMirror mat_b(mat_b_h); if constexpr (allocation == Allocation::do_allocation) { if constexpr (isDistributed) { - return hermitian_generalized_eigensolver(grid..., uplo, mat_a.get(), mat_b.get()); + if constexpr (factorization == Factorization::do_factorization){ + return hermitian_generalized_eigensolver(grid..., uplo, mat_a.get(), mat_b.get()); + } + else{ + cholesky_factorization(grid..., uplo, mat_b.get()); + return hermitian_generalized_eigensolver_factorized(grid..., uplo, mat_a.get(), mat_b.get()); + } } else { - return hermitian_generalized_eigensolver(uplo, mat_a.get(), mat_b.get()); + if constexpr (factorization == Factorization::do_factorization){ + return hermitian_generalized_eigensolver(uplo, mat_a.get(), mat_b.get()); + } + else{ + cholesky_factorization(uplo, mat_b.get()); + return hermitian_generalized_eigensolver_factorized(uplo, mat_a.get(), mat_b.get()); + } } } else if constexpr (allocation == Allocation::use_preallocated) { @@ -117,13 +131,27 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType TileElementSize(mat_a_h.blockSize().rows(), 1)); if constexpr (isDistributed) { Matrix eigenvectors(GlobalElementSize(size, size), mat_a_h.blockSize(), grid...); + if constexpr (factorization == Factorization::do_factorization){ hermitian_generalized_eigensolver(grid..., uplo, mat_a.get(), mat_b.get(), eigenvalues, eigenvectors); + } + else{ + cholesky_factorization(grid..., uplo, mat_b.get()); + hermitian_generalized_eigensolver_factorized(grid..., uplo, mat_a.get(), mat_b.get(), eigenvalues, + eigenvectors); + + } return EigensolverResult{std::move(eigenvalues), std::move(eigenvectors)}; } else { Matrix eigenvectors(LocalElementSize(size, size), mat_a_h.blockSize()); - hermitian_generalized_eigensolver(uplo, mat_a.get(), mat_b.get(), eigenvalues, eigenvectors); + if constexpr (factorization == Factorization::do_factorization){ + hermitian_generalized_eigensolver(uplo, mat_a.get(), mat_b.get(), eigenvalues, eigenvectors); + } + else{ + cholesky_factorization(uplo, mat_b.get()); + hermitian_generalized_eigensolver_factorized(uplo, mat_a.get(), mat_b.get(), eigenvalues, eigenvectors); + } return EigensolverResult{std::move(eigenvalues), std::move(eigenvectors)}; } } @@ -139,8 +167,10 @@ TYPED_TEST(GenEigensolverTestMC, CorrectnessLocal) { for (auto uplo : blas_uplos) { for (auto [m, mb, b_min] : sizes) { getTuneParameters().eigensolver_min_band = b_min; - testGenEigensolver(uplo, m, mb); - testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); } } } @@ -150,10 +180,10 @@ TYPED_TEST(GenEigensolverTestMC, CorrectnessDistributed) { for (auto uplo : blas_uplos) { for (auto [m, mb, b_min] : sizes) { getTuneParameters().eigensolver_min_band = b_min; - testGenEigensolver(uplo, m, mb, - grid); - testGenEigensolver(uplo, m, - mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); } } } @@ -164,9 +194,10 @@ TYPED_TEST(GenEigensolverTestGPU, CorrectnessLocal) { for (auto uplo : blas_uplos) { for (auto [m, mb, b_min] : sizes) { getTuneParameters().eigensolver_min_band = b_min; - testGenEigensolver(uplo, m, mb); - testGenEigensolver(uplo, m, - mb); + testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); } } } @@ -176,10 +207,10 @@ TYPED_TEST(GenEigensolverTestGPU, CorrectnessDistributed) { for (auto uplo : blas_uplos) { for (auto [m, mb, b_min] : sizes) { getTuneParameters().eigensolver_min_band = b_min; - testGenEigensolver(uplo, m, mb, - grid); - testGenEigensolver(uplo, m, - mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); } } } From 962a037f097473c78f3aa5161e8540cd38b3b0c6 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Mon, 17 Jun 2024 13:31:21 +0200 Subject: [PATCH 03/15] c api --- src/c_api/eigensolver/gen_eigensolver.cpp | 63 +++++++++++++++++++++++ src/c_api/eigensolver/gen_eigensolver.h | 59 ++++++++++++++++++--- 2 files changed, 114 insertions(+), 8 deletions(-) diff --git a/src/c_api/eigensolver/gen_eigensolver.cpp b/src/c_api/eigensolver/gen_eigensolver.cpp index c8900e4aee..e0c6825de4 100644 --- a/src/c_api/eigensolver/gen_eigensolver.cpp +++ b/src/c_api/eigensolver/gen_eigensolver.cpp @@ -52,6 +52,41 @@ int dlaf_hermitian_generalized_eigensolver_z(const int dlaf_context, const char dlaf_descb, w, z, dlaf_descz); } +int dlaf_symmetric_generalized_eigensolver_factorized_s(const int dlaf_context, const char uplo, float* a, + const struct DLAF_descriptor dlaf_desca, float* b, + const struct DLAF_descriptor dlaf_descb, float* w, float* z, + const struct DLAF_descriptor dlaf_descz) noexcept { + return hermitian_generalized_eigensolver_factorized(dlaf_context, uplo, a, dlaf_desca, b, dlaf_descb, w, z, + dlaf_descz); +} + +int dlaf_symmetric_generalized_eigensolver_factorized_d(const int dlaf_context, const char uplo, double* a, + const struct DLAF_descriptor dlaf_desca, double* b, + const struct DLAF_descriptor dlaf_descb, double* w, + double* z, + const struct DLAF_descriptor dlaf_descz) noexcept { + return hermitian_generalized_eigensolver_factorized(dlaf_context, uplo, a, dlaf_desca, b, dlaf_descb, w, + z, dlaf_descz); +} + +int dlaf_hermitian_generalized_eigensolver_factorized_c(const int dlaf_context, const char uplo, dlaf_complex_c* a, + const struct DLAF_descriptor dlaf_desca, dlaf_complex_c* b, + const struct DLAF_descriptor dlaf_descb, float* w, + dlaf_complex_c* z, + const struct DLAF_descriptor dlaf_descz) noexcept { + return hermitian_generalized_eigensolver_factorized>(dlaf_context, uplo, a, dlaf_desca, b, + dlaf_descb, w, z, dlaf_descz); +} + +int dlaf_hermitian_generalized_eigensolver_factorized_z(const int dlaf_context, const char uplo, dlaf_complex_z* a, + const struct DLAF_descriptor dlaf_desca, dlaf_complex_z* b, + const struct DLAF_descriptor dlaf_descb, double* w, + dlaf_complex_z* z, + const struct DLAF_descriptor dlaf_descz) noexcept { + return hermitian_generalized_eigensolver_factorized>(dlaf_context, uplo, a, dlaf_desca, b, + dlaf_descb, w, z, dlaf_descz); +} + #ifdef DLAF_WITH_SCALAPACK void dlaf_pssygvx(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], @@ -81,4 +116,32 @@ void dlaf_pzhegvx(const char uplo, const int m, dlaf_complex_z* a, const int ia, pxhegvx>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } +void dlaf_pssygvx_factorized(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], + float* b, const int ib, const int jb, const int descb[9], float* w, float* z, + const int iz, const int jz, const int descz[9], int* info) noexcept { + pxhegvx_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); +} + +void dlaf_pdsygvx_factorized(const char uplo, const int m, double* a, const int ia, const int ja, + const int desca[9], double* b, const int ib, const int jb, const int descb[9], + double* w, double* z, const int iz, const int jz, const int descz[9], + int* info) noexcept { + pxhegvx_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); +} + +void dlaf_pchegvx_factorized(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, + const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], + float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], + int* info) noexcept { + pxhegvx_factorized>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); +} + +void dlaf_pzhegvx_factorized(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, + const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], + double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], + int* info) noexcept { + pxhegvx_factorized>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); +} + + #endif diff --git a/src/c_api/eigensolver/gen_eigensolver.h b/src/c_api/eigensolver/gen_eigensolver.h index dee96655b0..c16d3e7e2b 100644 --- a/src/c_api/eigensolver/gen_eigensolver.h +++ b/src/c_api/eigensolver/gen_eigensolver.h @@ -28,10 +28,11 @@ #include "../utils.h" template -int hermitian_generalized_eigensolver(const int dlaf_context, const char uplo, T* a, +int hermitian_generalized_eigensolver_helper(const int dlaf_context, const char uplo, T* a, const DLAF_descriptor dlaf_desca, T* b, const DLAF_descriptor dlaf_descb, dlaf::BaseType* w, T* z, - const DLAF_descriptor dlaf_descz) { + const DLAF_descriptor dlaf_descz, + bool factorized) { using MatrixHost = dlaf::matrix::Matrix; using MatrixMirror = dlaf::matrix::MatrixMirror; using MatrixBaseMirror = @@ -64,10 +65,17 @@ int hermitian_generalized_eigensolver(const int dlaf_context, const char uplo, T MatrixMirror matrix_b(matrix_host_b); MatrixMirror eigenvectors(eigenvectors_host); MatrixBaseMirror eigenvalues(eigenvalues_host); - + + if(!factorized){ dlaf::hermitian_generalized_eigensolver( communicator_grid, blas::char2uplo(uplo), matrix_a.get(), matrix_b.get(), eigenvalues.get(), eigenvectors.get()); + } + else{ + dlaf::hermitian_generalized_eigensolver_factorized( + communicator_grid, blas::char2uplo(uplo), matrix_a.get(), matrix_b.get(), eigenvalues.get(), + eigenvectors.get()); + } } // Destroy mirror // Ensure data is copied back to the host @@ -78,12 +86,30 @@ int hermitian_generalized_eigensolver(const int dlaf_context, const char uplo, T return 0; } +template +int hermitian_generalized_eigensolver(const int dlaf_context, const char uplo, T* a, + const DLAF_descriptor dlaf_desca, T* b, + const DLAF_descriptor dlaf_descb, dlaf::BaseType* w, T* z, + const DLAF_descriptor dlaf_descz) { + return + hermitian_generalized_eigensolver_helper(dlaf_context, uplo, a, dlaf_desca, b, dlaf_descb, w, z, dlaf_descz, false); +} + +template +int hermitian_generalized_eigensolver_factorized(const int dlaf_context, const char uplo, T* a, + const DLAF_descriptor dlaf_desca, T* b, + const DLAF_descriptor dlaf_descb, dlaf::BaseType* w, T* z, + const DLAF_descriptor dlaf_descz) { + return + hermitian_generalized_eigensolver_helper(dlaf_context, uplo, a, dlaf_desca, b, dlaf_descb, w, z, dlaf_descz, true); +} + #ifdef DLAF_WITH_SCALAPACK template -void pxhegvx(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b, +void pxhegvx_helper(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b, const int ib, const int jb, const int descb[9], dlaf::BaseType* w, T* z, const int iz, - int jz, const int descz[9], int& info) { + int jz, const int descz[9], int& info, bool factorized) { DLAF_ASSERT(desca[0] == 1, desca[0]); DLAF_ASSERT(descb[0] == 1, descb[0]); DLAF_ASSERT(descz[0] == 1, descz[0]); @@ -99,10 +125,27 @@ void pxhegvx(const char uplo, const int m, T* a, const int ia, const int ja, con auto dlaf_desca = make_dlaf_descriptor(m, m, ia, ja, desca); auto dlaf_descb = make_dlaf_descriptor(m, m, ib, jb, descb); auto dlaf_descz = make_dlaf_descriptor(m, m, iz, jz, descz); + + if(!factorized){ + info = hermitian_generalized_eigensolver(desca[1], uplo, a, dlaf_desca, b, dlaf_descb, w, z, dlaf_descz); + } + else{ + info = hermitian_generalized_eigensolver_factorized(desca[1], uplo, a, dlaf_desca, b, dlaf_descb, w, z, dlaf_descz); + } +} - auto _info = - hermitian_generalized_eigensolver(desca[1], uplo, a, dlaf_desca, b, dlaf_descb, w, z, dlaf_descz); - info = _info; +template +void pxhegvx(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b, + const int ib, const int jb, const int descb[9], dlaf::BaseType* w, T* z, const int iz, + int jz, const int descz[9], int& info) { + pxhegvx_helper(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info, false); +} + +template +void pxhegvx_factorized(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b, + const int ib, const int jb, const int descb[9], dlaf::BaseType* w, T* z, const int iz, + int jz, const int descz[9], int& info) { + pxhegvx_helper(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info, true); } #endif From 4c5ebe1ccd220773e9e4e20c39866bb616479fe2 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Mon, 17 Jun 2024 13:32:19 +0200 Subject: [PATCH 04/15] formatting --- include/dlaf/eigensolver/gen_eigensolver.h | 60 ++++++++----- .../dlaf/eigensolver/gen_eigensolver/api.h | 5 +- .../dlaf/eigensolver/gen_eigensolver/impl.h | 10 ++- src/c_api/eigensolver/gen_eigensolver.cpp | 85 ++++++++++--------- src/c_api/eigensolver/gen_eigensolver.h | 65 +++++++------- .../unit/eigensolver/test_gen_eigensolver.cpp | 85 ++++++++++++------- 6 files changed, 175 insertions(+), 135 deletions(-) diff --git a/include/dlaf/eigensolver/gen_eigensolver.h b/include/dlaf/eigensolver/gen_eigensolver.h index 110ed9d1d1..0ba4f8032d 100644 --- a/include/dlaf/eigensolver/gen_eigensolver.h +++ b/include/dlaf/eigensolver/gen_eigensolver.h @@ -11,7 +11,6 @@ /// @file -#include "gen_eigensolver/api.h" #include #include @@ -21,13 +20,17 @@ #include #include +#include "gen_eigensolver/api.h" + namespace dlaf { -namespace eigensolver::internal{ +namespace eigensolver::internal { template void hermitian_generalized_eigensolver_helper(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, - Matrix, D>& eigenvalues, Matrix& eigenvectors, const Factorization factorization) { + Matrix, D>& eigenvalues, + Matrix& eigenvectors, + const Factorization factorization) { DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); DLAF_ASSERT(matrix::local_matrix(eigenvalues), eigenvalues); @@ -50,13 +53,14 @@ void hermitian_generalized_eigensolver_helper(blas::Uplo uplo, Matrix& mat DLAF_ASSERT(matrix::single_tile_per_block(eigenvalues), eigenvalues); DLAF_ASSERT(matrix::single_tile_per_block(eigenvectors), eigenvectors); - eigensolver::internal::GenEigensolver::call(uplo, mat_a, mat_b, eigenvalues, eigenvectors, factorization); + eigensolver::internal::GenEigensolver::call(uplo, mat_a, mat_b, eigenvalues, eigenvectors, + factorization); } template -void hermitian_generalized_eigensolver_helper(comm::CommunicatorGrid& grid, blas::Uplo uplo, - Matrix& mat_a, Matrix& mat_b, - Matrix, D>& eigenvalues, Matrix& eigenvectors, const Factorization factorization) { +void hermitian_generalized_eigensolver_helper( + comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, + Matrix, D>& eigenvalues, Matrix& eigenvectors, const Factorization factorization) { DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); DLAF_ASSERT(matrix::local_matrix(eigenvalues), eigenvalues); @@ -82,7 +86,6 @@ void hermitian_generalized_eigensolver_helper(comm::CommunicatorGrid& grid, blas eigensolver::internal::GenEigensolver::call(grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors, factorization); } - } @@ -127,7 +130,9 @@ void hermitian_generalized_eigensolver_helper(comm::CommunicatorGrid& grid, blas template void hermitian_generalized_eigensolver(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, Matrix, D>& eigenvalues, Matrix& eigenvectors) { - eigensolver::internal::hermitian_generalized_eigensolver_helper(uplo, mat_a, mat_b, eigenvalues, eigenvectors, eigensolver::internal::Factorization::do_factorization); + eigensolver::internal::hermitian_generalized_eigensolver_helper( + uplo, mat_a, mat_b, eigenvalues, eigenvectors, + eigensolver::internal::Factorization::do_factorization); } /// Generalized Eigensolver. @@ -218,9 +223,13 @@ EigensolverResult hermitian_generalized_eigensolver(blas::Uplo uplo, Matri /// @pre @p eigenvectors has blocksize (NB x NB) /// @pre @p eigenvectors has tilesize (NB x NB) template -void hermitian_generalized_eigensolver_factorized(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, - Matrix, D>& eigenvalues, Matrix& eigenvectors) { - eigensolver::internal::hermitian_generalized_eigensolver_helper(uplo, mat_a, mat_b, eigenvalues, eigenvectors, eigensolver::internal::Factorization::already_factorized); +void hermitian_generalized_eigensolver_factorized(blas::Uplo uplo, Matrix& mat_a, + Matrix& mat_b, + Matrix, D>& eigenvalues, + Matrix& eigenvectors) { + eigensolver::internal::hermitian_generalized_eigensolver_helper( + uplo, mat_a, mat_b, eigenvalues, eigenvectors, + eigensolver::internal::Factorization::already_factorized); } /// Generalized Eigensolver. @@ -250,8 +259,9 @@ void hermitian_generalized_eigensolver_factorized(blas::Uplo uplo, Matrix& /// @pre @p mat_b has tilesize (NB x NB) /// @pre @p mat_b is the result of a Cholesky factorization template -EigensolverResult hermitian_generalized_eigensolver_factorized(blas::Uplo uplo, Matrix& mat_a, - Matrix& mat_b) { +EigensolverResult hermitian_generalized_eigensolver_factorized(blas::Uplo uplo, + Matrix& mat_a, + Matrix& mat_b) { DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); DLAF_ASSERT(matrix::square_size(mat_a), mat_a); @@ -315,8 +325,9 @@ template void hermitian_generalized_eigensolver(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, Matrix, D>& eigenvalues, Matrix& eigenvectors) { - eigensolver::internal::hermitian_generalized_eigensolver_helper(grid, uplo, mat_a, mat_b, eigenvalues, - eigenvectors, eigensolver::internal::Factorization::do_factorization); + eigensolver::internal::hermitian_generalized_eigensolver_helper( + grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors, + eigensolver::internal::Factorization::do_factorization); } /// Generalized Eigensolver. @@ -410,10 +421,12 @@ EigensolverResult hermitian_generalized_eigensolver(comm::CommunicatorGrid /// @pre @p eigenvectors has tilesize (NB x NB) template void hermitian_generalized_eigensolver_factorized(comm::CommunicatorGrid& grid, blas::Uplo uplo, - Matrix& mat_a, Matrix& mat_b, - Matrix, D>& eigenvalues, Matrix& eigenvectors) { - eigensolver::internal::hermitian_generalized_eigensolver_helper(grid, uplo, mat_a, mat_b, eigenvalues, - eigenvectors, eigensolver::internal::Factorization::already_factorized); + Matrix& mat_a, Matrix& mat_b, + Matrix, D>& eigenvalues, + Matrix& eigenvectors) { + eigensolver::internal::hermitian_generalized_eigensolver_helper( + grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors, + eigensolver::internal::Factorization::already_factorized); } /// Generalized Eigensolver. @@ -444,8 +457,8 @@ void hermitian_generalized_eigensolver_factorized(comm::CommunicatorGrid& grid, /// @pre @p mat_b has tilesize (NB x NB) /// @pre @p mat_b is the result of a Cholesky factorization template -EigensolverResult hermitian_generalized_eigensolver_factorized(comm::CommunicatorGrid& grid, blas::Uplo uplo, - Matrix& mat_a, Matrix& mat_b) { +EigensolverResult hermitian_generalized_eigensolver_factorized( + comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b) { DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); DLAF_ASSERT(matrix::square_size(mat_a), mat_a); @@ -461,7 +474,8 @@ EigensolverResult hermitian_generalized_eigensolver_factorized(comm::Commu TileElementSize(mat_a.blockSize().rows(), 1)); matrix::Matrix eigenvectors(GlobalElementSize(size, size), mat_a.blockSize(), grid); - hermitian_generalized_eigensolver_factorized(grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors); + hermitian_generalized_eigensolver_factorized(grid, uplo, mat_a, mat_b, eigenvalues, + eigenvectors); return {std::move(eigenvalues), std::move(eigenvectors)}; } diff --git a/include/dlaf/eigensolver/gen_eigensolver/api.h b/include/dlaf/eigensolver/gen_eigensolver/api.h index 0023e21e54..648d6d58e0 100644 --- a/include/dlaf/eigensolver/gen_eigensolver/api.h +++ b/include/dlaf/eigensolver/gen_eigensolver/api.h @@ -16,12 +16,13 @@ namespace dlaf::eigensolver::internal { -enum class Factorization {do_factorization, already_factorized}; +enum class Factorization { do_factorization, already_factorized }; template struct GenEigensolver { static void call(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, - Matrix, device>& eigenvalues, Matrix& eigenvectors, const Factorization factorization); + Matrix, device>& eigenvalues, Matrix& eigenvectors, + const Factorization factorization); static void call(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, Matrix, device>& eigenvalues, Matrix& eigenvectors, const Factorization factorizatio); diff --git a/include/dlaf/eigensolver/gen_eigensolver/impl.h b/include/dlaf/eigensolver/gen_eigensolver/impl.h index 6a6d295d81..2452b1aa50 100644 --- a/include/dlaf/eigensolver/gen_eigensolver/impl.h +++ b/include/dlaf/eigensolver/gen_eigensolver/impl.h @@ -9,7 +9,6 @@ // #pragma once -#include "api.h" #include #include #include @@ -18,12 +17,15 @@ #include #include +#include "api.h" + namespace dlaf::eigensolver::internal { template void GenEigensolver::call(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, - Matrix, D>& eigenvalues, Matrix& eigenvectors, const Factorization factorization) { - if(factorization == Factorization::do_factorization){ + Matrix, D>& eigenvalues, Matrix& eigenvectors, + const Factorization factorization) { + if (factorization == Factorization::do_factorization) { cholesky_factorization(uplo, mat_b); } generalized_to_standard(uplo, mat_a, mat_b); @@ -38,7 +40,7 @@ template void GenEigensolver::call(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, Matrix, D>& eigenvalues, Matrix& eigenvectors, const Factorization factorization) { - if(factorization == Factorization::do_factorization){ + if (factorization == Factorization::do_factorization) { cholesky_factorization(grid, uplo, mat_b); } generalized_to_standard(grid, uplo, mat_a, mat_b); diff --git a/src/c_api/eigensolver/gen_eigensolver.cpp b/src/c_api/eigensolver/gen_eigensolver.cpp index e0c6825de4..1bed9a8396 100644 --- a/src/c_api/eigensolver/gen_eigensolver.cpp +++ b/src/c_api/eigensolver/gen_eigensolver.cpp @@ -52,39 +52,38 @@ int dlaf_hermitian_generalized_eigensolver_z(const int dlaf_context, const char dlaf_descb, w, z, dlaf_descz); } -int dlaf_symmetric_generalized_eigensolver_factorized_s(const int dlaf_context, const char uplo, float* a, - const struct DLAF_descriptor dlaf_desca, float* b, - const struct DLAF_descriptor dlaf_descb, float* w, float* z, - const struct DLAF_descriptor dlaf_descz) noexcept { - return hermitian_generalized_eigensolver_factorized(dlaf_context, uplo, a, dlaf_desca, b, dlaf_descb, w, z, - dlaf_descz); +int dlaf_symmetric_generalized_eigensolver_factorized_s( + const int dlaf_context, const char uplo, float* a, const struct DLAF_descriptor dlaf_desca, float* b, + const struct DLAF_descriptor dlaf_descb, float* w, float* z, + const struct DLAF_descriptor dlaf_descz) noexcept { + return hermitian_generalized_eigensolver_factorized(dlaf_context, uplo, a, dlaf_desca, b, + dlaf_descb, w, z, dlaf_descz); } -int dlaf_symmetric_generalized_eigensolver_factorized_d(const int dlaf_context, const char uplo, double* a, - const struct DLAF_descriptor dlaf_desca, double* b, - const struct DLAF_descriptor dlaf_descb, double* w, - double* z, - const struct DLAF_descriptor dlaf_descz) noexcept { - return hermitian_generalized_eigensolver_factorized(dlaf_context, uplo, a, dlaf_desca, b, dlaf_descb, w, - z, dlaf_descz); +int dlaf_symmetric_generalized_eigensolver_factorized_d( + const int dlaf_context, const char uplo, double* a, const struct DLAF_descriptor dlaf_desca, + double* b, const struct DLAF_descriptor dlaf_descb, double* w, double* z, + const struct DLAF_descriptor dlaf_descz) noexcept { + return hermitian_generalized_eigensolver_factorized(dlaf_context, uplo, a, dlaf_desca, b, + dlaf_descb, w, z, dlaf_descz); } -int dlaf_hermitian_generalized_eigensolver_factorized_c(const int dlaf_context, const char uplo, dlaf_complex_c* a, - const struct DLAF_descriptor dlaf_desca, dlaf_complex_c* b, - const struct DLAF_descriptor dlaf_descb, float* w, - dlaf_complex_c* z, - const struct DLAF_descriptor dlaf_descz) noexcept { - return hermitian_generalized_eigensolver_factorized>(dlaf_context, uplo, a, dlaf_desca, b, - dlaf_descb, w, z, dlaf_descz); +int dlaf_hermitian_generalized_eigensolver_factorized_c( + const int dlaf_context, const char uplo, dlaf_complex_c* a, const struct DLAF_descriptor dlaf_desca, + dlaf_complex_c* b, const struct DLAF_descriptor dlaf_descb, float* w, dlaf_complex_c* z, + const struct DLAF_descriptor dlaf_descz) noexcept { + return hermitian_generalized_eigensolver_factorized>(dlaf_context, uplo, a, + dlaf_desca, b, dlaf_descb, w, + z, dlaf_descz); } -int dlaf_hermitian_generalized_eigensolver_factorized_z(const int dlaf_context, const char uplo, dlaf_complex_z* a, - const struct DLAF_descriptor dlaf_desca, dlaf_complex_z* b, - const struct DLAF_descriptor dlaf_descb, double* w, - dlaf_complex_z* z, - const struct DLAF_descriptor dlaf_descz) noexcept { - return hermitian_generalized_eigensolver_factorized>(dlaf_context, uplo, a, dlaf_desca, b, - dlaf_descb, w, z, dlaf_descz); +int dlaf_hermitian_generalized_eigensolver_factorized_z( + const int dlaf_context, const char uplo, dlaf_complex_z* a, const struct DLAF_descriptor dlaf_desca, + dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z, + const struct DLAF_descriptor dlaf_descz) noexcept { + return hermitian_generalized_eigensolver_factorized>(dlaf_context, uplo, a, + dlaf_desca, b, dlaf_descb, w, + z, dlaf_descz); } #ifdef DLAF_WITH_SCALAPACK @@ -116,32 +115,34 @@ void dlaf_pzhegvx(const char uplo, const int m, dlaf_complex_z* a, const int ia, pxhegvx>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } -void dlaf_pssygvx_factorized(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], - float* b, const int ib, const int jb, const int descb[9], float* w, float* z, - const int iz, const int jz, const int descz[9], int* info) noexcept { +void dlaf_pssygvx_factorized(const char uplo, const int m, float* a, const int ia, const int ja, + const int desca[9], float* b, const int ib, const int jb, + const int descb[9], float* w, float* z, const int iz, const int jz, + const int descz[9], int* info) noexcept { pxhegvx_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } void dlaf_pdsygvx_factorized(const char uplo, const int m, double* a, const int ia, const int ja, - const int desca[9], double* b, const int ib, const int jb, const int descb[9], - double* w, double* z, const int iz, const int jz, const int descz[9], - int* info) noexcept { + const int desca[9], double* b, const int ib, const int jb, + const int descb[9], double* w, double* z, const int iz, const int jz, + const int descz[9], int* info) noexcept { pxhegvx_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } void dlaf_pchegvx_factorized(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, - const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], - float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], - int* info) noexcept { - pxhegvx_factorized>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); + const int desca[9], dlaf_complex_c* b, const int ib, const int jb, + const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, + const int descz[9], int* info) noexcept { + pxhegvx_factorized>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, + descz, *info); } void dlaf_pzhegvx_factorized(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, - const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], - double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], - int* info) noexcept { - pxhegvx_factorized>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); + const int desca[9], dlaf_complex_z* b, const int ib, const int jb, + const int descb[9], double* w, dlaf_complex_z* z, const int iz, + const int jz, const int descz[9], int* info) noexcept { + pxhegvx_factorized>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, + descz, *info); } - #endif diff --git a/src/c_api/eigensolver/gen_eigensolver.h b/src/c_api/eigensolver/gen_eigensolver.h index c16d3e7e2b..855320c60e 100644 --- a/src/c_api/eigensolver/gen_eigensolver.h +++ b/src/c_api/eigensolver/gen_eigensolver.h @@ -29,10 +29,9 @@ template int hermitian_generalized_eigensolver_helper(const int dlaf_context, const char uplo, T* a, - const DLAF_descriptor dlaf_desca, T* b, - const DLAF_descriptor dlaf_descb, dlaf::BaseType* w, T* z, - const DLAF_descriptor dlaf_descz, - bool factorized) { + const DLAF_descriptor dlaf_desca, T* b, + const DLAF_descriptor dlaf_descb, dlaf::BaseType* w, + T* z, const DLAF_descriptor dlaf_descz, bool factorized) { using MatrixHost = dlaf::matrix::Matrix; using MatrixMirror = dlaf::matrix::MatrixMirror; using MatrixBaseMirror = @@ -65,16 +64,17 @@ int hermitian_generalized_eigensolver_helper(const int dlaf_context, const char MatrixMirror matrix_b(matrix_host_b); MatrixMirror eigenvectors(eigenvectors_host); MatrixBaseMirror eigenvalues(eigenvalues_host); - - if(!factorized){ - dlaf::hermitian_generalized_eigensolver( - communicator_grid, blas::char2uplo(uplo), matrix_a.get(), matrix_b.get(), eigenvalues.get(), - eigenvectors.get()); + + if (!factorized) { + dlaf::hermitian_generalized_eigensolver( + communicator_grid, blas::char2uplo(uplo), matrix_a.get(), matrix_b.get(), eigenvalues.get(), + eigenvectors.get()); } - else{ - dlaf::hermitian_generalized_eigensolver_factorized( - communicator_grid, blas::char2uplo(uplo), matrix_a.get(), matrix_b.get(), eigenvalues.get(), - eigenvectors.get()); + else { + dlaf::hermitian_generalized_eigensolver_factorized(communicator_grid, blas::char2uplo(uplo), + matrix_a.get(), matrix_b.get(), + eigenvalues.get(), eigenvectors.get()); } } // Destroy mirror @@ -91,25 +91,25 @@ int hermitian_generalized_eigensolver(const int dlaf_context, const char uplo, T const DLAF_descriptor dlaf_desca, T* b, const DLAF_descriptor dlaf_descb, dlaf::BaseType* w, T* z, const DLAF_descriptor dlaf_descz) { - return - hermitian_generalized_eigensolver_helper(dlaf_context, uplo, a, dlaf_desca, b, dlaf_descb, w, z, dlaf_descz, false); + return hermitian_generalized_eigensolver_helper(dlaf_context, uplo, a, dlaf_desca, b, dlaf_descb, w, + z, dlaf_descz, false); } template int hermitian_generalized_eigensolver_factorized(const int dlaf_context, const char uplo, T* a, - const DLAF_descriptor dlaf_desca, T* b, - const DLAF_descriptor dlaf_descb, dlaf::BaseType* w, T* z, - const DLAF_descriptor dlaf_descz) { - return - hermitian_generalized_eigensolver_helper(dlaf_context, uplo, a, dlaf_desca, b, dlaf_descb, w, z, dlaf_descz, true); + const DLAF_descriptor dlaf_desca, T* b, + const DLAF_descriptor dlaf_descb, dlaf::BaseType* w, + T* z, const DLAF_descriptor dlaf_descz) { + return hermitian_generalized_eigensolver_helper(dlaf_context, uplo, a, dlaf_desca, b, dlaf_descb, w, + z, dlaf_descz, true); } #ifdef DLAF_WITH_SCALAPACK template -void pxhegvx_helper(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b, - const int ib, const int jb, const int descb[9], dlaf::BaseType* w, T* z, const int iz, - int jz, const int descz[9], int& info, bool factorized) { +void pxhegvx_helper(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], + T* b, const int ib, const int jb, const int descb[9], dlaf::BaseType* w, T* z, + const int iz, int jz, const int descz[9], int& info, bool factorized) { DLAF_ASSERT(desca[0] == 1, desca[0]); DLAF_ASSERT(descb[0] == 1, descb[0]); DLAF_ASSERT(descz[0] == 1, descz[0]); @@ -125,12 +125,14 @@ void pxhegvx_helper(const char uplo, const int m, T* a, const int ia, const int auto dlaf_desca = make_dlaf_descriptor(m, m, ia, ja, desca); auto dlaf_descb = make_dlaf_descriptor(m, m, ib, jb, descb); auto dlaf_descz = make_dlaf_descriptor(m, m, iz, jz, descz); - - if(!factorized){ - info = hermitian_generalized_eigensolver(desca[1], uplo, a, dlaf_desca, b, dlaf_descb, w, z, dlaf_descz); + + if (!factorized) { + info = hermitian_generalized_eigensolver(desca[1], uplo, a, dlaf_desca, b, dlaf_descb, w, z, + dlaf_descz); } - else{ - info = hermitian_generalized_eigensolver_factorized(desca[1], uplo, a, dlaf_desca, b, dlaf_descb, w, z, dlaf_descz); + else { + info = hermitian_generalized_eigensolver_factorized(desca[1], uplo, a, dlaf_desca, b, dlaf_descb, + w, z, dlaf_descz); } } @@ -142,9 +144,10 @@ void pxhegvx(const char uplo, const int m, T* a, const int ia, const int ja, con } template -void pxhegvx_factorized(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b, - const int ib, const int jb, const int descb[9], dlaf::BaseType* w, T* z, const int iz, - int jz, const int descz[9], int& info) { +void pxhegvx_factorized(const char uplo, const int m, T* a, const int ia, const int ja, + const int desca[9], T* b, const int ib, const int jb, const int descb[9], + dlaf::BaseType* w, T* z, const int iz, int jz, const int descz[9], + int& info) { pxhegvx_helper(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info, true); } diff --git a/test/unit/eigensolver/test_gen_eigensolver.cpp b/test/unit/eigensolver/test_gen_eigensolver.cpp index 08ba280bbc..3bee309e71 100644 --- a/test/unit/eigensolver/test_gen_eigensolver.cpp +++ b/test/unit/eigensolver/test_gen_eigensolver.cpp @@ -71,7 +71,8 @@ const std::vector> sizes = { {34, 8, 3}, {32, 6, 3} // m > mb, sub-band }; -template +template void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType mb, GridIfDistributed&... grid) { constexpr bool isDistributed = (sizeof...(grid) == 1); @@ -107,19 +108,20 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType MatrixMirror mat_b(mat_b_h); if constexpr (allocation == Allocation::do_allocation) { if constexpr (isDistributed) { - if constexpr (factorization == Factorization::do_factorization){ + if constexpr (factorization == Factorization::do_factorization) { return hermitian_generalized_eigensolver(grid..., uplo, mat_a.get(), mat_b.get()); } - else{ + else { cholesky_factorization(grid..., uplo, mat_b.get()); - return hermitian_generalized_eigensolver_factorized(grid..., uplo, mat_a.get(), mat_b.get()); + return hermitian_generalized_eigensolver_factorized(grid..., uplo, mat_a.get(), + mat_b.get()); } } else { - if constexpr (factorization == Factorization::do_factorization){ + if constexpr (factorization == Factorization::do_factorization) { return hermitian_generalized_eigensolver(uplo, mat_a.get(), mat_b.get()); } - else{ + else { cholesky_factorization(uplo, mat_b.get()); return hermitian_generalized_eigensolver_factorized(uplo, mat_a.get(), mat_b.get()); } @@ -131,26 +133,27 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType TileElementSize(mat_a_h.blockSize().rows(), 1)); if constexpr (isDistributed) { Matrix eigenvectors(GlobalElementSize(size, size), mat_a_h.blockSize(), grid...); - if constexpr (factorization == Factorization::do_factorization){ - hermitian_generalized_eigensolver(grid..., uplo, mat_a.get(), mat_b.get(), eigenvalues, - eigenvectors); + if constexpr (factorization == Factorization::do_factorization) { + hermitian_generalized_eigensolver(grid..., uplo, mat_a.get(), mat_b.get(), eigenvalues, + eigenvectors); } - else{ + else { cholesky_factorization(grid..., uplo, mat_b.get()); - hermitian_generalized_eigensolver_factorized(grid..., uplo, mat_a.get(), mat_b.get(), eigenvalues, - eigenvectors); - + hermitian_generalized_eigensolver_factorized(grid..., uplo, mat_a.get(), mat_b.get(), + eigenvalues, eigenvectors); } return EigensolverResult{std::move(eigenvalues), std::move(eigenvectors)}; } else { Matrix eigenvectors(LocalElementSize(size, size), mat_a_h.blockSize()); - if constexpr (factorization == Factorization::do_factorization){ - hermitian_generalized_eigensolver(uplo, mat_a.get(), mat_b.get(), eigenvalues, eigenvectors); + if constexpr (factorization == Factorization::do_factorization) { + hermitian_generalized_eigensolver(uplo, mat_a.get(), mat_b.get(), eigenvalues, + eigenvectors); } - else{ + else { cholesky_factorization(uplo, mat_b.get()); - hermitian_generalized_eigensolver_factorized(uplo, mat_a.get(), mat_b.get(), eigenvalues, eigenvectors); + hermitian_generalized_eigensolver_factorized(uplo, mat_a.get(), mat_b.get(), eigenvalues, + eigenvectors); } return EigensolverResult{std::move(eigenvalues), std::move(eigenvectors)}; } @@ -167,10 +170,14 @@ TYPED_TEST(GenEigensolverTestMC, CorrectnessLocal) { for (auto uplo : blas_uplos) { for (auto [m, mb, b_min] : sizes) { getTuneParameters().eigensolver_min_band = b_min; - testGenEigensolver(uplo, m, mb); - testGenEigensolver(uplo, m, mb); - testGenEigensolver(uplo, m, mb); - testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); } } } @@ -180,10 +187,14 @@ TYPED_TEST(GenEigensolverTestMC, CorrectnessDistributed) { for (auto uplo : blas_uplos) { for (auto [m, mb, b_min] : sizes) { getTuneParameters().eigensolver_min_band = b_min; - testGenEigensolver(uplo, m, mb, grid); - testGenEigensolver(uplo, m, mb, grid); - testGenEigensolver(uplo, m, mb, grid); - testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); } } } @@ -194,10 +205,14 @@ TYPED_TEST(GenEigensolverTestGPU, CorrectnessLocal) { for (auto uplo : blas_uplos) { for (auto [m, mb, b_min] : sizes) { getTuneParameters().eigensolver_min_band = b_min; - testGenEigensolver(uplo, m, mb); - testGenEigensolver(uplo, m, mb); - testGenEigensolver(uplo, m, mb); - testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); + testGenEigensolver(uplo, m, mb); } } } @@ -207,10 +222,14 @@ TYPED_TEST(GenEigensolverTestGPU, CorrectnessDistributed) { for (auto uplo : blas_uplos) { for (auto [m, mb, b_min] : sizes) { getTuneParameters().eigensolver_min_band = b_min; - testGenEigensolver(uplo, m, mb, grid); - testGenEigensolver(uplo, m, mb, grid); - testGenEigensolver(uplo, m, mb, grid); - testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); } } } From 09ae24c43aca96781eb3653dbc24e2e735a91ae3 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Mon, 17 Jun 2024 13:40:57 +0200 Subject: [PATCH 05/15] c api test wrappers --- include/dlaf_c/eigensolver/gen_eigensolver.h | 114 ++++++++++++++++++ .../test_gen_eigensolver_c_api_wrapper.c | 56 +++++++++ .../test_gen_eigensolver_c_api_wrapper.h | 40 ++++++ 3 files changed, 210 insertions(+) diff --git a/include/dlaf_c/eigensolver/gen_eigensolver.h b/include/dlaf_c/eigensolver/gen_eigensolver.h index 0d4dac7ae7..4843dc4d05 100644 --- a/include/dlaf_c/eigensolver/gen_eigensolver.h +++ b/include/dlaf_c/eigensolver/gen_eigensolver.h @@ -58,6 +58,52 @@ DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_z( dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z, const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT; +/// Generalized eigensolver +/// +/// @pre The matrices \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ are assumed to be +/// distributed and in host memory. The vector of eigenvalues \f$\mathbf{w}\f$ is assumed to be local +/// (non-distributed) and in host memory. Moving to and from GPU memory is handled internally. +/// +/// @pre The matrix \f$\mathbf{B}\f$ is assumed to be factorized; it is the result of a Cholesky +/// factorization +/// +/// @post The pika runtime is resumed when this function is called and suspended when the call +/// terminates. +/// +/// @param dlaf_context context associated to the DLA-Future grid created with @ref dlaf_create_grid +/// @param uplo indicates whether the upper ('U') or lower ('L') triangular part of the global submatrix +/// \f$\mathbf{A}\f$ is referenced +/// @param a Local part of the global matrix \f$\mathbf{A}\f$ +/// @param dlaf_desca DLA-Future descriptor of the global matrix \f$\mathbf{A}\f$ +/// @param b Local part of the Cholesky factorization of the global matrix \f$\mathbf{B}\f$ +/// @param dlaf_descb DLA-Future descriptor of the global matrix \f$\mathbf{B}\f$ +/// @param w Local vector of eigenvalues (non-distributed) +/// @param z Local part of the global matrix \f$\mathbf{Z}\f$ +/// @param dlaf_descz DLA-Future descriptor of the global matrix \f$\mathbf{Z}\f$ +/// @return 0 if the eigensolver completed normally +DLAF_EXTERN_C int dlaf_symmetric_generalized_eigensolver_factorized_s( + const int dlaf_context, const char uplo, float* a, const struct DLAF_descriptor dlaf_desca, float* b, + const struct DLAF_descriptor dlaf_descb, float* w, float* z, + const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT; + +/// @copydoc dlaf_symmetric_generalized_eigensolver_factorized_s +DLAF_EXTERN_C int dlaf_symmetric_generalized_eigensolver_factorized_d( + const int dlaf_context, const char uplo, double* a, const struct DLAF_descriptor dlaf_desca, + double* b, const struct DLAF_descriptor dlaf_descb, double* w, double* z, + const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT; + +/// @copydoc dlaf_symmetric_generalized_eigensolver_factorized_s +DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_factorized_c( + const int dlaf_context, const char uplo, dlaf_complex_c* a, const struct DLAF_descriptor dlaf_desca, + dlaf_complex_c* b, const struct DLAF_descriptor dlaf_descb, float* w, dlaf_complex_c* z, + const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT; + +/// @copydoc dlaf_symmetric_generalized_eigensolver_factorized_s +DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_factorized_z( + const int dlaf_context, const char uplo, dlaf_complex_z* a, const struct DLAF_descriptor dlaf_desca, + dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z, + const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT; + #ifdef DLAF_WITH_SCALAPACK /// Generalized eigensolver @@ -121,4 +167,72 @@ DLAF_EXTERN_C void dlaf_pzhegvx(const char uplo, const int n, dlaf_complex_z* a, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT; +/// Generalized eigensolver +/// +/// @remark This function is only available when DLAF_WITH_SCALAPACK=ON. +/// +/// @pre The matrices \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ are assumed to be +/// distributed and in host memory. The vector of eigenvalues \f$\mathbf{w}\f$ is assumed to be local +/// (non-distributed) and in host memory. Moving to and from GPU memory is handled internally. +/// +/// @pre The matrix \f$\mathbf{B}\f$ is assumed to be factorized; it is the result of a Cholesky +/// factorization +/// +/// @pre Submatrices are currently not supported, so @p n is the size of the full matrices +/// \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ and @p ia, @p ja, @p ib, @p jb, @p iz, and +/// @p jz need to be 1. +/// +/// @post The pika runtime is resumed when this function is called and suspended when the call +/// terminates. +/// +/// @param uplo indicates whether the upper ('U') or lower ('L') triangular part of the global submatrix +/// \f$\mathbf{A}\f$ is referenced +/// @param n order of the sumbatrix \f$\mathbf{A}\f$ used in the computation +/// @param a Local part of the global matrix \f$\mathbf{A}\f$ +/// @param ia row index of the global matrix \f$\mathbf{A}\f$ identifying the first row of the submatrix +/// $A$, has to be 1 +/// @param ja column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the +/// submatrix \f$\mathbf{A}\f$, has to be 1 +/// @param desca ScaLAPACK array descriptor of the global matrix \f$\mathbf{A}\f$ +/// @param b Local part of the Cholesky factorization of the global matrix \f$\mathbf{B}\f$ +/// @param ib row index of the global matrix \f$\mathbf{B}\f$ identifying the first row of the submatrix +/// \f$\mathbf{B}\f$, has to be 1 +/// @param jb column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the +/// submatrix \f$\mathbf{B}\f$, has to be 1 +/// @param descb ScaLAPACK array descriptor of the global matrix \f$\mathbf{B}\f$ +/// @param w Local vector of eigenvalues (non-distributed) +/// @param z Local part of the global matrix \f$\mathbf{Z}\f$ +/// @param iz row index of the global matrix \f$\mathbf{Z}\f$ identifying the first row of the submatrix +/// \f$\mathbf{Z}\f$, has to be 1 +/// @param jz column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the +/// submatrix \f$\mathbf{A}\f$, has to be 1 +/// @param descz ScaLAPACK array descriptor of the global matrix \f$\mathbf{Z}\f$ +/// @param[out] info 0 if the eigensolver completed normally +DLAF_EXTERN_C void dlaf_pssygvx_factorized(const char uplo, const int n, float* a, const int ia, + const int ja, const int desca[9], float* b, const int ib, + const int jb, const int descb[9], float* w, float* z, + const int iz, const int jz, const int descz[9], + int* info) DLAF_NOEXCEPT; + +/// @copydoc dlaf_pssygvx_factorized +DLAF_EXTERN_C void dlaf_pdsygvx_factorized(const char uplo, const int n, double* a, const int ia, + const int ja, const int desca[9], double* b, const int ib, + const int jb, const int descb[9], double* w, double* z, + const int iz, const int jz, const int descz[9], + int* info) DLAF_NOEXCEPT; + +/// @copydoc dlaf_pssygvx_factorized +DLAF_EXTERN_C void dlaf_pchegvx_factorized(const char uplo, const int n, dlaf_complex_c* a, const int ia, + const int ja, const int desca[9], dlaf_complex_c* b, + const int ib, const int jb, const int descb[9], float* w, + dlaf_complex_c* z, const int iz, const int jz, + const int descz[9], int* info) DLAF_NOEXCEPT; + +/// @copydoc dlaf_pssygvx_factorized +DLAF_EXTERN_C void dlaf_pzhegvx_factorized(const char uplo, const int n, dlaf_complex_z* a, const int ia, + const int ja, const int desca[9], dlaf_complex_z* b, + const int ib, const int jb, const int descb[9], double* w, + dlaf_complex_z* z, const int iz, const int jz, + const int descz[9], int* info) DLAF_NOEXCEPT; + #endif diff --git a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.c b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.c index 7131692b03..f8e0e71355 100644 --- a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.c +++ b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.c @@ -44,6 +44,36 @@ int C_dlaf_hermitian_generalized_eigensolver_z(const int dlaf_context, const cha return dlaf_hermitian_generalized_eigensolver_z(dlaf_context, uplo, a, desca, b, descb, w, z, descz); } +int C_dlaf_symmetric_generalized_eigensolver_factorized_s(const int dlaf_context, const char uplo, float* a, + const struct DLAF_descriptor desca, float* b, + const struct DLAF_descriptor descb, float* w, float* z, + const struct DLAF_descriptor descz) { + return dlaf_symmetric_generalized_eigensolver_factorized_s(dlaf_context, uplo, a, desca, b, descb, w, z, descz); +} + +int C_dlaf_symmetric_generalized_eigensolver_factorized_d(const int dlaf_context, const char uplo, double* a, + const struct DLAF_descriptor desca, double* b, + const struct DLAF_descriptor descb, double* w, double* z, + const struct DLAF_descriptor descz) { + return dlaf_symmetric_generalized_eigensolver_factorized_d(dlaf_context, uplo, a, desca, b, descb, w, z, descz); +} + +int C_dlaf_hermitian_generalized_eigensolver_factorized_c(const int dlaf_context, const char uplo, + dlaf_complex_c* a, const struct DLAF_descriptor desca, + dlaf_complex_c* b, const struct DLAF_descriptor descb, + float* w, dlaf_complex_c* z, + const struct DLAF_descriptor descz) { + return dlaf_hermitian_generalized_eigensolver_factorized_c(dlaf_context, uplo, a, desca, b, descb, w, z, descz); +} + +int C_dlaf_hermitian_generalized_eigensolver_factorized_z(const int dlaf_context, const char uplo, + dlaf_complex_z* a, const struct DLAF_descriptor desca, + dlaf_complex_z* b, const struct DLAF_descriptor descb, + double* w, dlaf_complex_z* z, + const struct DLAF_descriptor descz) { + return dlaf_hermitian_generalized_eigensolver_factorized_z(dlaf_context, uplo, a, desca, b, descb, w, z, descz); +} + #ifdef DLAF_WITH_SCALAPACK void C_dlaf_pssygvx(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], @@ -72,4 +102,30 @@ void C_dlaf_pzhegvx(const char uplo, const int m, dlaf_complex_z* a, const int i dlaf_pzhegvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } +void C_dlaf_pssygvx_factorized(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], + float* b, const int ib, const int jb, const int descb[9], float* w, float* z, + const int iz, const int jz, const int descz[9], int* info) { + dlaf_pssygvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); +} + +void C_dlaf_pdsygvx_factorized(const char uplo, const int m, double* a, const int ia, const int ja, + const int desca[9], double* b, const int ib, const int jb, const int descb[9], + double* w, double* z, const int iz, const int jz, const int descz[9], int* info) { + dlaf_pdsygvx_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); +} + +void C_dlaf_pchegvx_factorized(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, + const int desca[9], dlaf_complex_c* b, const int ib, const int jb, + const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, + const int descz[9], int* info) { + dlaf_pchegvx_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); +} + +void C_dlaf_pzhegvx_factorized(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, + const int desca[9], dlaf_complex_z* b, const int ib, const int jb, + const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, + const int descz[9], int* info) { + dlaf_pzhegvx_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); +} + #endif diff --git a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.h b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.h index 58cd021ffe..01e3fa9eef 100644 --- a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.h +++ b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.h @@ -34,6 +34,24 @@ DLAF_EXTERN_C int C_dlaf_hermitian_generalized_eigensolver_z( dlaf_complex_z* b, const struct DLAF_descriptor descb, double* w, dlaf_complex_z* z, const struct DLAF_descriptor descz); +DLAF_EXTERN_C int C_dlaf_symmetric_generalized_eigensolver_factorized_s( + const int dlaf_context, const char uplo, float* a, const struct DLAF_descriptor desca, float* b, + const struct DLAF_descriptor descb, float* w, float* z, const struct DLAF_descriptor descz); + +DLAF_EXTERN_C int C_dlaf_symmetric_generalized_eigensolver_factorized_d( + const int dlaf_context, const char uplo, double* a, const struct DLAF_descriptor desca, double* b, + const struct DLAF_descriptor descb, double* w, double* z, const struct DLAF_descriptor descz); + +DLAF_EXTERN_C int C_dlaf_hermitian_generalized_eigensolver_factorized_c( + const int dlaf_context, const char uplo, dlaf_complex_c* a, const struct DLAF_descriptor desca, + dlaf_complex_c* b, const struct DLAF_descriptor descb, float* w, dlaf_complex_c* z, + const struct DLAF_descriptor descz); + +DLAF_EXTERN_C int C_dlaf_hermitian_generalized_eigensolver_factorized_z( + const int dlaf_context, const char uplo, dlaf_complex_z* a, const struct DLAF_descriptor desca, + dlaf_complex_z* b, const struct DLAF_descriptor descb, double* w, dlaf_complex_z* z, + const struct DLAF_descriptor descz); + #ifdef DLAF_WITH_SCALAPACK DLAF_EXTERN_C void C_dlaf_pssygvx(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, @@ -54,4 +72,26 @@ DLAF_EXTERN_C void C_dlaf_pzhegvx(const char uplo, const int m, dlaf_complex_z* const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], int* info); + +DLAF_EXTERN_C void C_dlaf_pssygvx_factorized(char uplo, const int m, float* a, const int ia, + const int ja, const int desca[9], float* b, const int ib, + const int jb, const int descb[9], float* w, float* z, + const int iz, const int jz, const int descz[9], int* info); + +DLAF_EXTERN_C void C_dlaf_pdsygvx_factorized(const char uplo, const int m, double* a, const int ia, + const int ja, const int desca[9], double* b, const int ib, + const int jb, const int descb[9], double* w, double* z, + const int iz, const int jz, const int descz[9], int* info); + +DLAF_EXTERN_C void C_dlaf_pchegvx_factorized(const char uplo, const int m, dlaf_complex_c* a, + const int ia, const int ja, const int desca[9], + dlaf_complex_c* b, const int ib, const int jb, + const int descb[9], float* w, dlaf_complex_c* z, + const int iz, const int jz, const int descz[9], int* info); + +DLAF_EXTERN_C void C_dlaf_pzhegvx_factorized(const char uplo, const int m, dlaf_complex_z* a, + const int ia, const int ja, const int desca[9], + dlaf_complex_z* b, const int ib, const int jb, + const int descb[9], double* w, dlaf_complex_z* z, + const int iz, const int jz, const int descz[9], int* info); #endif From 3141ef3b7c56a65e285df48220da819bd4538a00 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Mon, 17 Jun 2024 14:33:53 +0200 Subject: [PATCH 06/15] c api tests --- test/unit/c_api/eigensolver/CMakeLists.txt | 1 + .../test_gen_eigensolver_c_api.cpp | 97 +++++++++++++++---- 2 files changed, 77 insertions(+), 21 deletions(-) diff --git a/test/unit/c_api/eigensolver/CMakeLists.txt b/test/unit/c_api/eigensolver/CMakeLists.txt index 372bd2a2a2..6030e9651e 100644 --- a/test/unit/c_api/eigensolver/CMakeLists.txt +++ b/test/unit/c_api/eigensolver/CMakeLists.txt @@ -19,6 +19,7 @@ DLAF_addTest( DLAF_addTest( test_gen_eigensolver_c_api SOURCES test_gen_eigensolver_c_api.cpp test_gen_eigensolver_c_api_wrapper.c + ../factorization/test_cholesky_c_api_wrapper.c LIBRARIES dlaf.c_api USE_MAIN CAPI MPIRANKS 6 diff --git a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp index 806678571f..966ef08751 100644 --- a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp +++ b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp @@ -22,6 +22,7 @@ #include #include +#include "../factorization/test_cholesky_c_api_wrapper.h" #include "test_gen_eigensolver_c_api_config.h" #include "test_gen_eigensolver_c_api_wrapper.h" @@ -55,6 +56,8 @@ using GenEigensolverTestGPU = GenEigensolverTest; TYPED_TEST_SUITE(GenEigensolverTestGPU, MatrixElementTypes); #endif +using dlaf::eigensolver::internal::Factorization; + const std::vector blas_uplos({blas::Uplo::Lower}); const std::vector> sizes = { @@ -65,7 +68,7 @@ const std::vector> sizes = { {34, 8, 3}, {32, 6, 3} // m > mb, sub-band }; -template +template void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType mb, CommunicatorGrid& grid) { auto dlaf_context = c_api_test_inititialize(pika_argc, pika_argv, dlaf_argc, dlaf_argv, grid); @@ -131,28 +134,68 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType int err = -1; if constexpr (std::is_same_v) { - err = - C_dlaf_symmetric_generalized_eigensolver_d(dlaf_context, dlaf_uplo, local_a_ptr, dlaf_desc_a, - local_b_ptr, dlaf_desc_b, eigenvalues_ptr, - local_eigenvectors_ptr, dlaf_desc_eigenvectors); + if (factorization == Factorization::do_factorization) { + err = C_dlaf_symmetric_generalized_eigensolver_d(dlaf_context, dlaf_uplo, local_a_ptr, + dlaf_desc_a, local_b_ptr, dlaf_desc_b, + eigenvalues_ptr, local_eigenvectors_ptr, + dlaf_desc_eigenvectors); + } + else { + err = C_dlaf_cholesky_factorization_d(dlaf_context, dlaf_uplo, local_b_ptr, dlaf_desc_b); + DLAF_ASSERT(err == 0, err); + + err = C_dlaf_symmetric_generalized_eigensolver_factorized_d( + dlaf_context, dlaf_uplo, local_a_ptr, dlaf_desc_a, local_b_ptr, dlaf_desc_b, + eigenvalues_ptr, local_eigenvectors_ptr, dlaf_desc_eigenvectors); + } } else if constexpr (std::is_same_v) { - err = - C_dlaf_symmetric_generalized_eigensolver_s(dlaf_context, dlaf_uplo, local_a_ptr, dlaf_desc_a, - local_b_ptr, dlaf_desc_b, eigenvalues_ptr, - local_eigenvectors_ptr, dlaf_desc_eigenvectors); + if (factorization == Factorization::do_factorization) { + err = C_dlaf_symmetric_generalized_eigensolver_s(dlaf_context, dlaf_uplo, local_a_ptr, + dlaf_desc_a, local_b_ptr, dlaf_desc_b, + eigenvalues_ptr, local_eigenvectors_ptr, + dlaf_desc_eigenvectors); + } + else { + err = C_dlaf_cholesky_factorization_s(dlaf_context, dlaf_uplo, local_b_ptr, dlaf_desc_b); + DLAF_ASSERT(err == 0, err); + + err = C_dlaf_symmetric_generalized_eigensolver_factorized_s( + dlaf_context, dlaf_uplo, local_a_ptr, dlaf_desc_a, local_b_ptr, dlaf_desc_b, + eigenvalues_ptr, local_eigenvectors_ptr, dlaf_desc_eigenvectors); + } } else if constexpr (std::is_same_v>) { - err = - C_dlaf_hermitian_generalized_eigensolver_z(dlaf_context, dlaf_uplo, local_a_ptr, dlaf_desc_a, - local_b_ptr, dlaf_desc_b, eigenvalues_ptr, - local_eigenvectors_ptr, dlaf_desc_eigenvectors); + if (factorization == Factorization::do_factorization) { + err = C_dlaf_hermitian_generalized_eigensolver_z(dlaf_context, dlaf_uplo, local_a_ptr, + dlaf_desc_a, local_b_ptr, dlaf_desc_b, + eigenvalues_ptr, local_eigenvectors_ptr, + dlaf_desc_eigenvectors); + } + else { + err = C_dlaf_cholesky_factorization_z(dlaf_context, dlaf_uplo, local_b_ptr, dlaf_desc_b); + DLAF_ASSERT(err == 0, err); + + err = C_dlaf_hermitian_generalized_eigensolver_factorized_z( + dlaf_context, dlaf_uplo, local_a_ptr, dlaf_desc_a, local_b_ptr, dlaf_desc_b, + eigenvalues_ptr, local_eigenvectors_ptr, dlaf_desc_eigenvectors); + } } else if constexpr (std::is_same_v>) { - err = - C_dlaf_hermitian_generalized_eigensolver_c(dlaf_context, dlaf_uplo, local_a_ptr, dlaf_desc_a, - local_b_ptr, dlaf_desc_b, eigenvalues_ptr, - local_eigenvectors_ptr, dlaf_desc_eigenvectors); + if (factorization == Factorization::do_factorization) { + err = C_dlaf_hermitian_generalized_eigensolver_c(dlaf_context, dlaf_uplo, local_a_ptr, + dlaf_desc_a, local_b_ptr, dlaf_desc_b, + eigenvalues_ptr, local_eigenvectors_ptr, + dlaf_desc_eigenvectors); + } + else { + err = C_dlaf_cholesky_factorization_c(dlaf_context, dlaf_uplo, local_b_ptr, dlaf_desc_b); + DLAF_ASSERT(err == 0, err); + + err = C_dlaf_hermitian_generalized_eigensolver_factorized_c( + dlaf_context, dlaf_uplo, local_a_ptr, dlaf_desc_a, local_b_ptr, dlaf_desc_b, + eigenvalues_ptr, local_eigenvectors_ptr, dlaf_desc_eigenvectors); + } } else { DLAF_ASSERT(false, typeid(T).name()); @@ -209,7 +252,10 @@ TYPED_TEST(GenEigensolverTestMC, CorrectnessDistributedDLAF) { for (comm::CommunicatorGrid& grid : this->commGrids()) { for (auto uplo : blas_uplos) { for (auto [m, mb, b_min] : sizes) { - testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); } } } @@ -220,7 +266,10 @@ TYPED_TEST(GenEigensolverTestGPU, CorrectnessDistributedDLAF) { for (comm::CommunicatorGrid& grid : this->commGrids()) { for (auto uplo : blas_uplos) { for (auto [m, mb, b_min] : sizes) { - testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); } } } @@ -233,7 +282,10 @@ TYPED_TEST(GenEigensolverTestMC, CorrectnessDistributedScaLAPACK) { for (comm::CommunicatorGrid& grid : this->commGrids()) { for (auto uplo : blas_uplos) { for (auto [m, mb, b_min] : sizes) { - testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); } } } @@ -244,7 +296,10 @@ TYPED_TEST(GenEigensolverTestGPU, CorrectnessDistributedScaLAPACK) { for (comm::CommunicatorGrid& grid : this->commGrids()) { for (auto uplo : blas_uplos) { for (auto [m, mb, b_min] : sizes) { - testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); + testGenEigensolver(uplo, m, mb, grid); } } } From 838c50387d50c68cd74c2fc9360025970da89517 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Mon, 17 Jun 2024 15:36:26 +0200 Subject: [PATCH 07/15] Apply suggestions from code review --- include/dlaf/eigensolver/gen_eigensolver/api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dlaf/eigensolver/gen_eigensolver/api.h b/include/dlaf/eigensolver/gen_eigensolver/api.h index 648d6d58e0..d342cacbf5 100644 --- a/include/dlaf/eigensolver/gen_eigensolver/api.h +++ b/include/dlaf/eigensolver/gen_eigensolver/api.h @@ -25,7 +25,7 @@ struct GenEigensolver { const Factorization factorization); static void call(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, Matrix, device>& eigenvalues, - Matrix& eigenvectors, const Factorization factorizatio); + Matrix& eigenvectors, const Factorization factorization); }; // ETI From dfdca22357c50fc63b8373cbc3b6e75281d178de Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Tue, 18 Jun 2024 12:26:23 +0000 Subject: [PATCH 08/15] helper --- include/dlaf/eigensolver/gen_eigensolver.h | 138 ++++++++++----------- 1 file changed, 65 insertions(+), 73 deletions(-) diff --git a/include/dlaf/eigensolver/gen_eigensolver.h b/include/dlaf/eigensolver/gen_eigensolver.h index 0ba4f8032d..7cdb599d54 100644 --- a/include/dlaf/eigensolver/gen_eigensolver.h +++ b/include/dlaf/eigensolver/gen_eigensolver.h @@ -57,6 +57,29 @@ void hermitian_generalized_eigensolver_helper(blas::Uplo uplo, Matrix& mat factorization); } +template +EigensolverResult hermitian_generalized_eigensolver_helper(blas::Uplo uplo, Matrix& mat_a, + Matrix& mat_b, const Factorization factorization) { + DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); + DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); + DLAF_ASSERT(matrix::square_size(mat_a), mat_a); + DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(matrix::square_size(mat_b), mat_b); + DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b); + DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b); + DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b); + + const SizeType size = mat_a.size().rows(); + + matrix::Matrix, D> eigenvalues(LocalElementSize(size, 1), + TileElementSize(mat_a.blockSize().rows(), 1)); + matrix::Matrix eigenvectors(LocalElementSize(size, size), mat_a.blockSize()); + + hermitian_generalized_eigensolver_helper(uplo, mat_a, mat_b, eigenvalues, eigenvectors, factorization); + + return {std::move(eigenvalues), std::move(eigenvectors)}; +} + template void hermitian_generalized_eigensolver_helper( comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, @@ -87,6 +110,29 @@ void hermitian_generalized_eigensolver_helper( eigenvectors, factorization); } +template +EigensolverResult hermitian_generalized_eigensolver_helper(comm::CommunicatorGrid& grid, blas::Uplo uplo, + Matrix& mat_a, Matrix& mat_b, const Factorization factorization) { + DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); + DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); + DLAF_ASSERT(matrix::square_size(mat_a), mat_a); + DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(matrix::square_size(mat_b), mat_b); + DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b); + DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b); + DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b); + + const SizeType size = mat_a.size().rows(); + + matrix::Matrix, D> eigenvalues(LocalElementSize(size, 1), + TileElementSize(mat_a.blockSize().rows(), 1)); + matrix::Matrix eigenvectors(GlobalElementSize(size, size), mat_a.blockSize(), grid); + + hermitian_generalized_eigensolver_helper(grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors, factorization); + + return {std::move(eigenvalues), std::move(eigenvectors)}; +} + } /// Generalized Eigensolver. @@ -130,9 +176,11 @@ void hermitian_generalized_eigensolver_helper( template void hermitian_generalized_eigensolver(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, Matrix, D>& eigenvalues, Matrix& eigenvectors) { + using eigensolver::internal::Factorization; + eigensolver::internal::hermitian_generalized_eigensolver_helper( uplo, mat_a, mat_b, eigenvalues, eigenvectors, - eigensolver::internal::Factorization::do_factorization); + Factorization::do_factorization); } /// Generalized Eigensolver. @@ -163,24 +211,9 @@ void hermitian_generalized_eigensolver(blas::Uplo uplo, Matrix& mat_a, Mat template EigensolverResult hermitian_generalized_eigensolver(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b) { - DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); - DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); - DLAF_ASSERT(matrix::square_size(mat_a), mat_a); - DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); - DLAF_ASSERT(matrix::square_size(mat_b), mat_b); - DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b); - DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b); - DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b); - - const SizeType size = mat_a.size().rows(); + using eigensolver::internal::Factorization; - matrix::Matrix, D> eigenvalues(LocalElementSize(size, 1), - TileElementSize(mat_a.blockSize().rows(), 1)); - matrix::Matrix eigenvectors(LocalElementSize(size, size), mat_a.blockSize()); - - hermitian_generalized_eigensolver(uplo, mat_a, mat_b, eigenvalues, eigenvectors); - - return {std::move(eigenvalues), std::move(eigenvectors)}; + return eigensolver::internal::hermitian_generalized_eigensolver_helper(uplo, mat_a, mat_b, Factorization::do_factorization); } /// Generalized Eigensolver. @@ -227,9 +260,11 @@ void hermitian_generalized_eigensolver_factorized(blas::Uplo uplo, Matrix& Matrix& mat_b, Matrix, D>& eigenvalues, Matrix& eigenvectors) { + using eigensolver::internal::Factorization; + eigensolver::internal::hermitian_generalized_eigensolver_helper( uplo, mat_a, mat_b, eigenvalues, eigenvectors, - eigensolver::internal::Factorization::already_factorized); + Factorization::already_factorized); } /// Generalized Eigensolver. @@ -262,24 +297,9 @@ template EigensolverResult hermitian_generalized_eigensolver_factorized(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b) { - DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); - DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); - DLAF_ASSERT(matrix::square_size(mat_a), mat_a); - DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); - DLAF_ASSERT(matrix::square_size(mat_b), mat_b); - DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b); - DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b); - DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b); - - const SizeType size = mat_a.size().rows(); + using eigensolver::internal::Factorization; - matrix::Matrix, D> eigenvalues(LocalElementSize(size, 1), - TileElementSize(mat_a.blockSize().rows(), 1)); - matrix::Matrix eigenvectors(LocalElementSize(size, size), mat_a.blockSize()); - - hermitian_generalized_eigensolver_factorized(uplo, mat_a, mat_b, eigenvalues, eigenvectors); - - return {std::move(eigenvalues), std::move(eigenvectors)}; + return eigensolver::internal::hermitian_generalized_eigensolver_helper(uplo, mat_a, mat_b, Factorization::already_factorized); } /// Generalized Eigensolver. @@ -325,9 +345,11 @@ template void hermitian_generalized_eigensolver(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, Matrix, D>& eigenvalues, Matrix& eigenvectors) { + using eigensolver::internal::Factorization; + eigensolver::internal::hermitian_generalized_eigensolver_helper( grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors, - eigensolver::internal::Factorization::do_factorization); + Factorization::do_factorization); } /// Generalized Eigensolver. @@ -359,24 +381,9 @@ void hermitian_generalized_eigensolver(comm::CommunicatorGrid& grid, blas::Uplo template EigensolverResult hermitian_generalized_eigensolver(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b) { - DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); - DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); - DLAF_ASSERT(matrix::square_size(mat_a), mat_a); - DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); - DLAF_ASSERT(matrix::square_size(mat_b), mat_b); - DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b); - DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b); - DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b); - - const SizeType size = mat_a.size().rows(); + using eigensolver::internal::Factorization; - matrix::Matrix, D> eigenvalues(LocalElementSize(size, 1), - TileElementSize(mat_a.blockSize().rows(), 1)); - matrix::Matrix eigenvectors(GlobalElementSize(size, size), mat_a.blockSize(), grid); - - hermitian_generalized_eigensolver(grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors); - - return {std::move(eigenvalues), std::move(eigenvectors)}; + return eigensolver::internal::hermitian_generalized_eigensolver_helper(grid, uplo, mat_a, mat_b, Factorization::do_factorization); } /// Generalized Eigensolver. @@ -424,9 +431,10 @@ void hermitian_generalized_eigensolver_factorized(comm::CommunicatorGrid& grid, Matrix& mat_a, Matrix& mat_b, Matrix, D>& eigenvalues, Matrix& eigenvectors) { + using eigensolver::internal::Factorization; eigensolver::internal::hermitian_generalized_eigensolver_helper( grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors, - eigensolver::internal::Factorization::already_factorized); + Factorization::already_factorized); } /// Generalized Eigensolver. @@ -459,24 +467,8 @@ void hermitian_generalized_eigensolver_factorized(comm::CommunicatorGrid& grid, template EigensolverResult hermitian_generalized_eigensolver_factorized( comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b) { - DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); - DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); - DLAF_ASSERT(matrix::square_size(mat_a), mat_a); - DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); - DLAF_ASSERT(matrix::square_size(mat_b), mat_b); - DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b); - DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b); - DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b); - - const SizeType size = mat_a.size().rows(); + using eigensolver::internal::Factorization; - matrix::Matrix, D> eigenvalues(LocalElementSize(size, 1), - TileElementSize(mat_a.blockSize().rows(), 1)); - matrix::Matrix eigenvectors(GlobalElementSize(size, size), mat_a.blockSize(), grid); - - hermitian_generalized_eigensolver_factorized(grid, uplo, mat_a, mat_b, eigenvalues, - eigenvectors); - - return {std::move(eigenvalues), std::move(eigenvectors)}; + return eigensolver::internal::hermitian_generalized_eigensolver_helper(grid, uplo, mat_a, mat_b, Factorization::already_factorized); } } From 5c7c7fe474f0723f1bb4ffb29b09f5628eafb611 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Tue, 18 Jun 2024 12:28:27 +0000 Subject: [PATCH 09/15] comment --- include/dlaf/eigensolver/gen_to_std.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/dlaf/eigensolver/gen_to_std.h b/include/dlaf/eigensolver/gen_to_std.h index 7dbb2299f8..b2389f36b9 100644 --- a/include/dlaf/eigensolver/gen_to_std.h +++ b/include/dlaf/eigensolver/gen_to_std.h @@ -38,7 +38,8 @@ namespace dlaf::eigensolver::internal { /// @pre @p mat_a has blocksize (NB x NB) /// @pre @p mat_a has tilesize (NB x NB) /// -/// @param mat_b contains the triangular matrix. It can be lower (L) or upper (U). Only the tiles of +/// @param mat_b contains the Cholesky factorisation of the Hermitian positive definite matrix B +/// The triangular matrix. It can be lower (L) or upper (U). Only the tiles of /// the matrix which contain the lower triangular or the upper triangular part are accessed. /// Note: B should be modifiable as the diagonal tiles might be temporarily modified during the calculation. /// @pre @p mat_b is not distributed From 2cce9b95ebe1b3cfae764493ea8e2b51198f6017 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Tue, 18 Jun 2024 12:37:42 +0000 Subject: [PATCH 10/15] fix namings --- include/dlaf_c/eigensolver/gen_eigensolver.h | 16 +++++----- src/c_api/eigensolver/gen_eigensolver.cpp | 32 +++++++++---------- src/c_api/eigensolver/gen_eigensolver.h | 10 +++--- .../test_gen_eigensolver_c_api.cpp | 8 ++--- .../test_gen_eigensolver_c_api_wrapper.c | 32 +++++++++---------- .../test_gen_eigensolver_c_api_wrapper.h | 16 +++++----- 6 files changed, 57 insertions(+), 57 deletions(-) diff --git a/include/dlaf_c/eigensolver/gen_eigensolver.h b/include/dlaf_c/eigensolver/gen_eigensolver.h index 4843dc4d05..89a94db88f 100644 --- a/include/dlaf_c/eigensolver/gen_eigensolver.h +++ b/include/dlaf_c/eigensolver/gen_eigensolver.h @@ -144,25 +144,25 @@ DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_factorized_z( /// submatrix \f$\mathbf{A}\f$, has to be 1 /// @param descz ScaLAPACK array descriptor of the global matrix \f$\mathbf{Z}\f$ /// @param[out] info 0 if the eigensolver completed normally -DLAF_EXTERN_C void dlaf_pssygvx(const char uplo, const int n, float* a, const int ia, const int ja, +DLAF_EXTERN_C void dlaf_pssygvd(const char uplo, const int n, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT; /// @copydoc dlaf_pssygvx -DLAF_EXTERN_C void dlaf_pdsygvx(const char uplo, const int n, double* a, const int ia, const int ja, +DLAF_EXTERN_C void dlaf_pdsygvd(const char uplo, const int n, double* a, const int ia, const int ja, const int desca[9], double* b, const int ib, const int jb, const int descb[9], double* w, double* z, const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT; /// @copydoc dlaf_pssygvx -DLAF_EXTERN_C void dlaf_pchegvx(const char uplo, const int n, dlaf_complex_c* a, const int ia, +DLAF_EXTERN_C void dlaf_pchegvd(const char uplo, const int n, dlaf_complex_c* a, const int ia, const int ja, const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT; /// @copydoc dlaf_pssygvx -DLAF_EXTERN_C void dlaf_pzhegvx(const char uplo, const int n, dlaf_complex_z* a, const int ia, +DLAF_EXTERN_C void dlaf_pzhegvd(const char uplo, const int n, dlaf_complex_z* a, const int ia, const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT; @@ -208,28 +208,28 @@ DLAF_EXTERN_C void dlaf_pzhegvx(const char uplo, const int n, dlaf_complex_z* a, /// submatrix \f$\mathbf{A}\f$, has to be 1 /// @param descz ScaLAPACK array descriptor of the global matrix \f$\mathbf{Z}\f$ /// @param[out] info 0 if the eigensolver completed normally -DLAF_EXTERN_C void dlaf_pssygvx_factorized(const char uplo, const int n, float* a, const int ia, +DLAF_EXTERN_C void dlaf_pssygvd_factorized(const char uplo, const int n, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT; /// @copydoc dlaf_pssygvx_factorized -DLAF_EXTERN_C void dlaf_pdsygvx_factorized(const char uplo, const int n, double* a, const int ia, +DLAF_EXTERN_C void dlaf_pdsygvd_factorized(const char uplo, const int n, double* a, const int ia, const int ja, const int desca[9], double* b, const int ib, const int jb, const int descb[9], double* w, double* z, const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT; /// @copydoc dlaf_pssygvx_factorized -DLAF_EXTERN_C void dlaf_pchegvx_factorized(const char uplo, const int n, dlaf_complex_c* a, const int ia, +DLAF_EXTERN_C void dlaf_pchegvd_factorized(const char uplo, const int n, dlaf_complex_c* a, const int ia, const int ja, const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT; /// @copydoc dlaf_pssygvx_factorized -DLAF_EXTERN_C void dlaf_pzhegvx_factorized(const char uplo, const int n, dlaf_complex_z* a, const int ia, +DLAF_EXTERN_C void dlaf_pzhegvd_factorized(const char uplo, const int n, dlaf_complex_z* a, const int ia, const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, diff --git a/src/c_api/eigensolver/gen_eigensolver.cpp b/src/c_api/eigensolver/gen_eigensolver.cpp index 1bed9a8396..e73cd8ef07 100644 --- a/src/c_api/eigensolver/gen_eigensolver.cpp +++ b/src/c_api/eigensolver/gen_eigensolver.cpp @@ -88,60 +88,60 @@ int dlaf_hermitian_generalized_eigensolver_factorized_z( #ifdef DLAF_WITH_SCALAPACK -void dlaf_pssygvx(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], +void dlaf_pssygvd(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info) noexcept { - pxhegvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); + pxhegvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } -void dlaf_pdsygvx(const char uplo, const int m, double* a, const int ia, const int ja, +void dlaf_pdsygvd(const char uplo, const int m, double* a, const int ia, const int ja, const int desca[9], double* b, const int ib, const int jb, const int descb[9], double* w, double* z, const int iz, const int jz, const int descz[9], int* info) noexcept { - pxhegvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); + pxhegvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } -void dlaf_pchegvx(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, +void dlaf_pchegvd(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], int* info) noexcept { - pxhegvx>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); + pxhegvd>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } -void dlaf_pzhegvx(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, +void dlaf_pzhegvd(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], int* info) noexcept { - pxhegvx>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); + pxhegvd>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } -void dlaf_pssygvx_factorized(const char uplo, const int m, float* a, const int ia, const int ja, +void dlaf_pssygvd_factorized(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info) noexcept { - pxhegvx_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); + pxhegvd_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } -void dlaf_pdsygvx_factorized(const char uplo, const int m, double* a, const int ia, const int ja, +void dlaf_pdsygvd_factorized(const char uplo, const int m, double* a, const int ia, const int ja, const int desca[9], double* b, const int ib, const int jb, const int descb[9], double* w, double* z, const int iz, const int jz, const int descz[9], int* info) noexcept { - pxhegvx_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); + pxhegvd_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } -void dlaf_pchegvx_factorized(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, +void dlaf_pchegvd_factorized(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], int* info) noexcept { - pxhegvx_factorized>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, + pxhegvd_factorized>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } -void dlaf_pzhegvx_factorized(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, +void dlaf_pzhegvd_factorized(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], int* info) noexcept { - pxhegvx_factorized>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, + pxhegvd_factorized>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } diff --git a/src/c_api/eigensolver/gen_eigensolver.h b/src/c_api/eigensolver/gen_eigensolver.h index 855320c60e..b3cc3ff385 100644 --- a/src/c_api/eigensolver/gen_eigensolver.h +++ b/src/c_api/eigensolver/gen_eigensolver.h @@ -107,7 +107,7 @@ int hermitian_generalized_eigensolver_factorized(const int dlaf_context, const c #ifdef DLAF_WITH_SCALAPACK template -void pxhegvx_helper(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], +void pxhegvd_helper(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b, const int ib, const int jb, const int descb[9], dlaf::BaseType* w, T* z, const int iz, int jz, const int descz[9], int& info, bool factorized) { DLAF_ASSERT(desca[0] == 1, desca[0]); @@ -137,18 +137,18 @@ void pxhegvx_helper(const char uplo, const int m, T* a, const int ia, const int } template -void pxhegvx(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b, +void pxhegvd(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b, const int ib, const int jb, const int descb[9], dlaf::BaseType* w, T* z, const int iz, int jz, const int descz[9], int& info) { - pxhegvx_helper(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info, false); + pxhegvd_helper(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info, false); } template -void pxhegvx_factorized(const char uplo, const int m, T* a, const int ia, const int ja, +void pxhegvd_factorized(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b, const int ib, const int jb, const int descb[9], dlaf::BaseType* w, T* z, const int iz, int jz, const int descz[9], int& info) { - pxhegvx_helper(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info, true); + pxhegvd_helper(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info, true); } #endif diff --git a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp index 966ef08751..b71e43d6e1 100644 --- a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp +++ b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp @@ -209,19 +209,19 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType int desc_z[] = {1, dlaf_context, (int) m, (int) m, (int) mb, (int) mb, 0, 0, lld_eigenvectors}; int info = -1; if constexpr (std::is_same_v) { - C_dlaf_pdsygvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, + C_dlaf_pdsygvd(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); } else if constexpr (std::is_same_v) { - C_dlaf_pssygvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, + C_dlaf_pssygvd(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); } else if constexpr (std::is_same_v>) { - C_dlaf_pzhegvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, + C_dlaf_pzhegvd(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); } else if constexpr (std::is_same_v>) { - C_dlaf_pchegvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, + C_dlaf_pchegvd(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); } else { diff --git a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.c b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.c index f8e0e71355..6881bfb4ab 100644 --- a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.c +++ b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.c @@ -76,56 +76,56 @@ int C_dlaf_hermitian_generalized_eigensolver_factorized_z(const int dlaf_context #ifdef DLAF_WITH_SCALAPACK -void C_dlaf_pssygvx(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], +void C_dlaf_pssygvd(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pssygvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pssygvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } -void C_dlaf_pdsygvx(const char uplo, const int m, double* a, const int ia, const int ja, +void C_dlaf_pdsygvd(const char uplo, const int m, double* a, const int ia, const int ja, const int desca[9], double* b, const int ib, const int jb, const int descb[9], double* w, double* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pdsygvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pdsygvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } -void C_dlaf_pchegvx(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, +void C_dlaf_pchegvd(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pchegvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pchegvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } -void C_dlaf_pzhegvx(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, +void C_dlaf_pzhegvd(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pzhegvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pzhegvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } -void C_dlaf_pssygvx_factorized(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], +void C_dlaf_pssygvd_factorized(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pssygvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pssygvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } -void C_dlaf_pdsygvx_factorized(const char uplo, const int m, double* a, const int ia, const int ja, +void C_dlaf_pdsygvd_factorized(const char uplo, const int m, double* a, const int ia, const int ja, const int desca[9], double* b, const int ib, const int jb, const int descb[9], double* w, double* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pdsygvx_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pdsygvd_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } -void C_dlaf_pchegvx_factorized(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, +void C_dlaf_pchegvd_factorized(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pchegvx_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pchegvd_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } -void C_dlaf_pzhegvx_factorized(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, +void C_dlaf_pzhegvd_factorized(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pzhegvx_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pzhegvd_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } #endif diff --git a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.h b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.h index 01e3fa9eef..c04a73da60 100644 --- a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.h +++ b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.h @@ -53,43 +53,43 @@ DLAF_EXTERN_C int C_dlaf_hermitian_generalized_eigensolver_factorized_z( const struct DLAF_descriptor descz); #ifdef DLAF_WITH_SCALAPACK -DLAF_EXTERN_C void C_dlaf_pssygvx(char uplo, const int m, float* a, const int ia, const int ja, +DLAF_EXTERN_C void C_dlaf_pssygvd(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info); -DLAF_EXTERN_C void C_dlaf_pdsygvx(const char uplo, const int m, double* a, const int ia, const int ja, +DLAF_EXTERN_C void C_dlaf_pdsygvd(const char uplo, const int m, double* a, const int ia, const int ja, const int desca[9], double* b, const int ib, const int jb, const int descb[9], double* w, double* z, const int iz, const int jz, const int descz[9], int* info); -DLAF_EXTERN_C void C_dlaf_pchegvx(const char uplo, const int m, dlaf_complex_c* a, const int ia, +DLAF_EXTERN_C void C_dlaf_pchegvd(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], int* info); -DLAF_EXTERN_C void C_dlaf_pzhegvx(const char uplo, const int m, dlaf_complex_z* a, const int ia, +DLAF_EXTERN_C void C_dlaf_pzhegvd(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], int* info); -DLAF_EXTERN_C void C_dlaf_pssygvx_factorized(char uplo, const int m, float* a, const int ia, +DLAF_EXTERN_C void C_dlaf_pssygvd_factorized(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info); -DLAF_EXTERN_C void C_dlaf_pdsygvx_factorized(const char uplo, const int m, double* a, const int ia, +DLAF_EXTERN_C void C_dlaf_pdsygvd_factorized(const char uplo, const int m, double* a, const int ia, const int ja, const int desca[9], double* b, const int ib, const int jb, const int descb[9], double* w, double* z, const int iz, const int jz, const int descz[9], int* info); -DLAF_EXTERN_C void C_dlaf_pchegvx_factorized(const char uplo, const int m, dlaf_complex_c* a, +DLAF_EXTERN_C void C_dlaf_pchegvd_factorized(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], int* info); -DLAF_EXTERN_C void C_dlaf_pzhegvx_factorized(const char uplo, const int m, dlaf_complex_z* a, +DLAF_EXTERN_C void C_dlaf_pzhegvd_factorized(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, From cff5b719d2db7b745385c5bdf1b5b2458e1baaa8 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Tue, 18 Jun 2024 12:41:09 +0000 Subject: [PATCH 11/15] format --- include/dlaf/eigensolver/gen_eigensolver.h | 39 ++++++++++++---------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/include/dlaf/eigensolver/gen_eigensolver.h b/include/dlaf/eigensolver/gen_eigensolver.h index 7cdb599d54..a36af8d931 100644 --- a/include/dlaf/eigensolver/gen_eigensolver.h +++ b/include/dlaf/eigensolver/gen_eigensolver.h @@ -59,7 +59,8 @@ void hermitian_generalized_eigensolver_helper(blas::Uplo uplo, Matrix& mat template EigensolverResult hermitian_generalized_eigensolver_helper(blas::Uplo uplo, Matrix& mat_a, - Matrix& mat_b, const Factorization factorization) { + Matrix& mat_b, + const Factorization factorization) { DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); DLAF_ASSERT(matrix::square_size(mat_a), mat_a); @@ -75,7 +76,8 @@ EigensolverResult hermitian_generalized_eigensolver_helper(blas::Uplo uplo TileElementSize(mat_a.blockSize().rows(), 1)); matrix::Matrix eigenvectors(LocalElementSize(size, size), mat_a.blockSize()); - hermitian_generalized_eigensolver_helper(uplo, mat_a, mat_b, eigenvalues, eigenvectors, factorization); + hermitian_generalized_eigensolver_helper(uplo, mat_a, mat_b, eigenvalues, eigenvectors, + factorization); return {std::move(eigenvalues), std::move(eigenvectors)}; } @@ -111,8 +113,10 @@ void hermitian_generalized_eigensolver_helper( } template -EigensolverResult hermitian_generalized_eigensolver_helper(comm::CommunicatorGrid& grid, blas::Uplo uplo, - Matrix& mat_a, Matrix& mat_b, const Factorization factorization) { +EigensolverResult hermitian_generalized_eigensolver_helper(comm::CommunicatorGrid& grid, + blas::Uplo uplo, Matrix& mat_a, + Matrix& mat_b, + const Factorization factorization) { DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); DLAF_ASSERT(matrix::square_size(mat_a), mat_a); @@ -128,7 +132,8 @@ EigensolverResult hermitian_generalized_eigensolver_helper(comm::Communica TileElementSize(mat_a.blockSize().rows(), 1)); matrix::Matrix eigenvectors(GlobalElementSize(size, size), mat_a.blockSize(), grid); - hermitian_generalized_eigensolver_helper(grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors, factorization); + hermitian_generalized_eigensolver_helper(grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors, + factorization); return {std::move(eigenvalues), std::move(eigenvectors)}; } @@ -179,8 +184,7 @@ void hermitian_generalized_eigensolver(blas::Uplo uplo, Matrix& mat_a, Mat using eigensolver::internal::Factorization; eigensolver::internal::hermitian_generalized_eigensolver_helper( - uplo, mat_a, mat_b, eigenvalues, eigenvectors, - Factorization::do_factorization); + uplo, mat_a, mat_b, eigenvalues, eigenvectors, Factorization::do_factorization); } /// Generalized Eigensolver. @@ -213,7 +217,8 @@ EigensolverResult hermitian_generalized_eigensolver(blas::Uplo uplo, Matri Matrix& mat_b) { using eigensolver::internal::Factorization; - return eigensolver::internal::hermitian_generalized_eigensolver_helper(uplo, mat_a, mat_b, Factorization::do_factorization); + return eigensolver::internal::hermitian_generalized_eigensolver_helper( + uplo, mat_a, mat_b, Factorization::do_factorization); } /// Generalized Eigensolver. @@ -263,8 +268,7 @@ void hermitian_generalized_eigensolver_factorized(blas::Uplo uplo, Matrix& using eigensolver::internal::Factorization; eigensolver::internal::hermitian_generalized_eigensolver_helper( - uplo, mat_a, mat_b, eigenvalues, eigenvectors, - Factorization::already_factorized); + uplo, mat_a, mat_b, eigenvalues, eigenvectors, Factorization::already_factorized); } /// Generalized Eigensolver. @@ -299,7 +303,8 @@ EigensolverResult hermitian_generalized_eigensolver_factorized(blas::Uplo Matrix& mat_b) { using eigensolver::internal::Factorization; - return eigensolver::internal::hermitian_generalized_eigensolver_helper(uplo, mat_a, mat_b, Factorization::already_factorized); + return eigensolver::internal::hermitian_generalized_eigensolver_helper( + uplo, mat_a, mat_b, Factorization::already_factorized); } /// Generalized Eigensolver. @@ -348,8 +353,7 @@ void hermitian_generalized_eigensolver(comm::CommunicatorGrid& grid, blas::Uplo using eigensolver::internal::Factorization; eigensolver::internal::hermitian_generalized_eigensolver_helper( - grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors, - Factorization::do_factorization); + grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors, Factorization::do_factorization); } /// Generalized Eigensolver. @@ -383,7 +387,8 @@ EigensolverResult hermitian_generalized_eigensolver(comm::CommunicatorGrid Matrix& mat_a, Matrix& mat_b) { using eigensolver::internal::Factorization; - return eigensolver::internal::hermitian_generalized_eigensolver_helper(grid, uplo, mat_a, mat_b, Factorization::do_factorization); + return eigensolver::internal::hermitian_generalized_eigensolver_helper( + grid, uplo, mat_a, mat_b, Factorization::do_factorization); } /// Generalized Eigensolver. @@ -433,8 +438,7 @@ void hermitian_generalized_eigensolver_factorized(comm::CommunicatorGrid& grid, Matrix& eigenvectors) { using eigensolver::internal::Factorization; eigensolver::internal::hermitian_generalized_eigensolver_helper( - grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors, - Factorization::already_factorized); + grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors, Factorization::already_factorized); } /// Generalized Eigensolver. @@ -469,6 +473,7 @@ EigensolverResult hermitian_generalized_eigensolver_factorized( comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b) { using eigensolver::internal::Factorization; - return eigensolver::internal::hermitian_generalized_eigensolver_helper(grid, uplo, mat_a, mat_b, Factorization::already_factorized); + return eigensolver::internal::hermitian_generalized_eigensolver_helper( + grid, uplo, mat_a, mat_b, Factorization::already_factorized); } } From a6274a44700999f1a04ef6f67fc1dc9d9e44f2a3 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Wed, 19 Jun 2024 09:02:02 +0200 Subject: [PATCH 12/15] Update include/dlaf/eigensolver/gen_to_std.h --- include/dlaf/eigensolver/gen_to_std.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dlaf/eigensolver/gen_to_std.h b/include/dlaf/eigensolver/gen_to_std.h index b2389f36b9..61a5be8c31 100644 --- a/include/dlaf/eigensolver/gen_to_std.h +++ b/include/dlaf/eigensolver/gen_to_std.h @@ -39,7 +39,7 @@ namespace dlaf::eigensolver::internal { /// @pre @p mat_a has tilesize (NB x NB) /// /// @param mat_b contains the Cholesky factorisation of the Hermitian positive definite matrix B -/// The triangular matrix. It can be lower (L) or upper (U). Only the tiles of +/// The triangular matrix can be lower (L) or upper (U). Only the tiles of /// the matrix which contain the lower triangular or the upper triangular part are accessed. /// Note: B should be modifiable as the diagonal tiles might be temporarily modified during the calculation. /// @pre @p mat_b is not distributed From aa0a2359eb3c763b373263dbac0a8e85d34696a0 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Wed, 19 Jun 2024 09:03:07 +0200 Subject: [PATCH 13/15] revert changes from other PR --- include/dlaf_c/eigensolver/gen_eigensolver.h | 8 ++++---- src/c_api/eigensolver/gen_eigensolver.cpp | 14 +++++++------- src/c_api/eigensolver/gen_eigensolver.h | 2 +- .../eigensolver/test_gen_eigensolver_c_api.cpp | 8 ++++---- .../test_gen_eigensolver_c_api_wrapper.c | 18 +++++++++--------- .../test_gen_eigensolver_c_api_wrapper.h | 8 ++++---- 6 files changed, 29 insertions(+), 29 deletions(-) diff --git a/include/dlaf_c/eigensolver/gen_eigensolver.h b/include/dlaf_c/eigensolver/gen_eigensolver.h index 89a94db88f..719c3a06c3 100644 --- a/include/dlaf_c/eigensolver/gen_eigensolver.h +++ b/include/dlaf_c/eigensolver/gen_eigensolver.h @@ -144,25 +144,25 @@ DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_factorized_z( /// submatrix \f$\mathbf{A}\f$, has to be 1 /// @param descz ScaLAPACK array descriptor of the global matrix \f$\mathbf{Z}\f$ /// @param[out] info 0 if the eigensolver completed normally -DLAF_EXTERN_C void dlaf_pssygvd(const char uplo, const int n, float* a, const int ia, const int ja, +DLAF_EXTERN_C void dlaf_pssygvx(const char uplo, const int n, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT; /// @copydoc dlaf_pssygvx -DLAF_EXTERN_C void dlaf_pdsygvd(const char uplo, const int n, double* a, const int ia, const int ja, +DLAF_EXTERN_C void dlaf_pdsygvx(const char uplo, const int n, double* a, const int ia, const int ja, const int desca[9], double* b, const int ib, const int jb, const int descb[9], double* w, double* z, const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT; /// @copydoc dlaf_pssygvx -DLAF_EXTERN_C void dlaf_pchegvd(const char uplo, const int n, dlaf_complex_c* a, const int ia, +DLAF_EXTERN_C void dlaf_pchegvx(const char uplo, const int n, dlaf_complex_c* a, const int ia, const int ja, const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT; /// @copydoc dlaf_pssygvx -DLAF_EXTERN_C void dlaf_pzhegvd(const char uplo, const int n, dlaf_complex_z* a, const int ia, +DLAF_EXTERN_C void dlaf_pzhegvx(const char uplo, const int n, dlaf_complex_z* a, const int ia, const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT; diff --git a/src/c_api/eigensolver/gen_eigensolver.cpp b/src/c_api/eigensolver/gen_eigensolver.cpp index e73cd8ef07..85e7f5ca4c 100644 --- a/src/c_api/eigensolver/gen_eigensolver.cpp +++ b/src/c_api/eigensolver/gen_eigensolver.cpp @@ -91,28 +91,28 @@ int dlaf_hermitian_generalized_eigensolver_factorized_z( void dlaf_pssygvd(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info) noexcept { - pxhegvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); + pxhegvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } -void dlaf_pdsygvd(const char uplo, const int m, double* a, const int ia, const int ja, +void dlaf_pdsygvx(const char uplo, const int m, double* a, const int ia, const int ja, const int desca[9], double* b, const int ib, const int jb, const int descb[9], double* w, double* z, const int iz, const int jz, const int descz[9], int* info) noexcept { - pxhegvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); + pxhegvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } -void dlaf_pchegvd(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, +void dlaf_pchegvx(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], int* info) noexcept { - pxhegvd>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); + pxhegvx>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } -void dlaf_pzhegvd(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, +void dlaf_pzhegvx(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], int* info) noexcept { - pxhegvd>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); + pxhegvx>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); } void dlaf_pssygvd_factorized(const char uplo, const int m, float* a, const int ia, const int ja, diff --git a/src/c_api/eigensolver/gen_eigensolver.h b/src/c_api/eigensolver/gen_eigensolver.h index b3cc3ff385..a1ea38bc14 100644 --- a/src/c_api/eigensolver/gen_eigensolver.h +++ b/src/c_api/eigensolver/gen_eigensolver.h @@ -137,7 +137,7 @@ void pxhegvd_helper(const char uplo, const int m, T* a, const int ia, const int } template -void pxhegvd(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b, +void pxhegvx(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b, const int ib, const int jb, const int descb[9], dlaf::BaseType* w, T* z, const int iz, int jz, const int descz[9], int& info) { pxhegvd_helper(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info, false); diff --git a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp index b71e43d6e1..966ef08751 100644 --- a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp +++ b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp @@ -209,19 +209,19 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType int desc_z[] = {1, dlaf_context, (int) m, (int) m, (int) mb, (int) mb, 0, 0, lld_eigenvectors}; int info = -1; if constexpr (std::is_same_v) { - C_dlaf_pdsygvd(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, + C_dlaf_pdsygvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); } else if constexpr (std::is_same_v) { - C_dlaf_pssygvd(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, + C_dlaf_pssygvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); } else if constexpr (std::is_same_v>) { - C_dlaf_pzhegvd(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, + C_dlaf_pzhegvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); } else if constexpr (std::is_same_v>) { - C_dlaf_pchegvd(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, + C_dlaf_pchegvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); } else { diff --git a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.c b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.c index 6881bfb4ab..e3582c0331 100644 --- a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.c +++ b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.c @@ -76,36 +76,36 @@ int C_dlaf_hermitian_generalized_eigensolver_factorized_z(const int dlaf_context #ifdef DLAF_WITH_SCALAPACK -void C_dlaf_pssygvd(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], +void C_dlaf_pssygvx(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pssygvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pssygvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } -void C_dlaf_pdsygvd(const char uplo, const int m, double* a, const int ia, const int ja, +void C_dlaf_pdsygvx(const char uplo, const int m, double* a, const int ia, const int ja, const int desca[9], double* b, const int ib, const int jb, const int descb[9], double* w, double* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pdsygvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pdsygvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } -void C_dlaf_pchegvd(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, +void C_dlaf_pchegvx(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pchegvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pchegvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } -void C_dlaf_pzhegvd(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, +void C_dlaf_pzhegvx(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pzhegvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pzhegvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } void C_dlaf_pssygvd_factorized(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info) { - dlaf_pssygvd(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); + dlaf_pssygvd_factorized(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, info); } void C_dlaf_pdsygvd_factorized(const char uplo, const int m, double* a, const int ia, const int ja, diff --git a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.h b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.h index c04a73da60..d313b09cb4 100644 --- a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.h +++ b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api_wrapper.h @@ -53,22 +53,22 @@ DLAF_EXTERN_C int C_dlaf_hermitian_generalized_eigensolver_factorized_z( const struct DLAF_descriptor descz); #ifdef DLAF_WITH_SCALAPACK -DLAF_EXTERN_C void C_dlaf_pssygvd(char uplo, const int m, float* a, const int ia, const int ja, +DLAF_EXTERN_C void C_dlaf_pssygvx(char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info); -DLAF_EXTERN_C void C_dlaf_pdsygvd(const char uplo, const int m, double* a, const int ia, const int ja, +DLAF_EXTERN_C void C_dlaf_pdsygvx(const char uplo, const int m, double* a, const int ia, const int ja, const int desca[9], double* b, const int ib, const int jb, const int descb[9], double* w, double* z, const int iz, const int jz, const int descz[9], int* info); -DLAF_EXTERN_C void C_dlaf_pchegvd(const char uplo, const int m, dlaf_complex_c* a, const int ia, +DLAF_EXTERN_C void C_dlaf_pchegvx(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja, const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9], int* info); -DLAF_EXTERN_C void C_dlaf_pzhegvd(const char uplo, const int m, dlaf_complex_z* a, const int ia, +DLAF_EXTERN_C void C_dlaf_pzhegvx(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja, const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9], double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9], int* info); From c036c7c6400879958d658cde1513d4b6c3b74183 Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Wed, 19 Jun 2024 10:02:25 +0200 Subject: [PATCH 14/15] add scalapack tests --- src/c_api/eigensolver/gen_eigensolver.cpp | 2 +- .../test_gen_eigensolver_c_api.cpp | 60 ++++++++++++++++--- 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/src/c_api/eigensolver/gen_eigensolver.cpp b/src/c_api/eigensolver/gen_eigensolver.cpp index 85e7f5ca4c..9307ffa014 100644 --- a/src/c_api/eigensolver/gen_eigensolver.cpp +++ b/src/c_api/eigensolver/gen_eigensolver.cpp @@ -88,7 +88,7 @@ int dlaf_hermitian_generalized_eigensolver_factorized_z( #ifdef DLAF_WITH_SCALAPACK -void dlaf_pssygvd(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], +void dlaf_pssygvx(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9], float* b, const int ib, const int jb, const int descb[9], float* w, float* z, const int iz, const int jz, const int descz[9], int* info) noexcept { pxhegvx(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info); diff --git a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp index 966ef08751..8c49a905e9 100644 --- a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp +++ b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp @@ -209,20 +209,64 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType int desc_z[] = {1, dlaf_context, (int) m, (int) m, (int) mb, (int) mb, 0, 0, lld_eigenvectors}; int info = -1; if constexpr (std::is_same_v) { - C_dlaf_pdsygvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, - eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); + if (factorization == Factorization::do_factorization) { + C_dlaf_pdsygvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, + eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); + } + else { + C_dlaf_pdpotrf(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, &info); + DLAF_ASSERT(info == 0, info); + info = -1; + + C_dlaf_pdsygvd_factorized(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, + desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, + &info); + } } else if constexpr (std::is_same_v) { - C_dlaf_pssygvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, - eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); + if (factorization == Factorization::do_factorization) { + C_dlaf_pssygvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, + eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); + } + else { + C_dlaf_pspotrf(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, &info); + DLAF_ASSERT(info == 0, info); + info = -1; + + C_dlaf_pssygvd_factorized(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, + desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, + &info); + } } else if constexpr (std::is_same_v>) { - C_dlaf_pzhegvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, - eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); + if (factorization == Factorization::do_factorization) { + C_dlaf_pzhegvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, + eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); + } + else { + C_dlaf_pzpotrf(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, &info); + DLAF_ASSERT(info == 0, info); + info = -1; + + C_dlaf_pzhegvd_factorized(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, + desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, + &info); + } } else if constexpr (std::is_same_v>) { - C_dlaf_pchegvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, - eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); + if (factorization == Factorization::do_factorization) { + C_dlaf_pchegvx(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, + eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); + } + else { + C_dlaf_pcpotrf(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, &info); + DLAF_ASSERT(info == 0, info); + info = -1; + + C_dlaf_pchegvd_factorized(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, + desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, + &info); + } } else { DLAF_ASSERT(false, typeid(T).name()); From c993ef824ef0aa8adfc96ae2f14595b8e32f2e0a Mon Sep 17 00:00:00 2001 From: Rocco Meli Date: Fri, 21 Jun 2024 10:34:48 +0000 Subject: [PATCH 15/15] add constexpr and fix scalapack test --- .../test_gen_eigensolver_c_api.cpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp index 5a26b76226..f247d49582 100644 --- a/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp +++ b/test/unit/c_api/eigensolver/test_gen_eigensolver_c_api.cpp @@ -134,7 +134,7 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType int err = -1; if constexpr (std::is_same_v) { - if (factorization == Factorization::do_factorization) { + if constexpr (factorization == Factorization::do_factorization) { err = C_dlaf_symmetric_generalized_eigensolver_d(dlaf_context, dlaf_uplo, local_a_ptr, dlaf_desc_a, local_b_ptr, dlaf_desc_b, eigenvalues_ptr, local_eigenvectors_ptr, @@ -150,7 +150,7 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType } } else if constexpr (std::is_same_v) { - if (factorization == Factorization::do_factorization) { + if constexpr (factorization == Factorization::do_factorization) { err = C_dlaf_symmetric_generalized_eigensolver_s(dlaf_context, dlaf_uplo, local_a_ptr, dlaf_desc_a, local_b_ptr, dlaf_desc_b, eigenvalues_ptr, local_eigenvectors_ptr, @@ -166,7 +166,7 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType } } else if constexpr (std::is_same_v>) { - if (factorization == Factorization::do_factorization) { + if constexpr (factorization == Factorization::do_factorization) { err = C_dlaf_hermitian_generalized_eigensolver_z(dlaf_context, dlaf_uplo, local_a_ptr, dlaf_desc_a, local_b_ptr, dlaf_desc_b, eigenvalues_ptr, local_eigenvectors_ptr, @@ -182,7 +182,7 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType } } else if constexpr (std::is_same_v>) { - if (factorization == Factorization::do_factorization) { + if constexpr (factorization == Factorization::do_factorization) { err = C_dlaf_hermitian_generalized_eigensolver_c(dlaf_context, dlaf_uplo, local_a_ptr, dlaf_desc_a, local_b_ptr, dlaf_desc_b, eigenvalues_ptr, local_eigenvectors_ptr, @@ -209,12 +209,12 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType int desc_z[] = {1, dlaf_context, (int) m, (int) m, (int) mb, (int) mb, 0, 0, lld_eigenvectors}; int info = -1; if constexpr (std::is_same_v) { - if (factorization == Factorization::do_factorization) { + if constexpr (factorization == Factorization::do_factorization) { C_dlaf_pdsygvd(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); } else { - C_dlaf_pdpotrf(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, &info); + C_dlaf_pdpotrf(dlaf_uplo, (int) m, local_b_ptr, 1, 1, desc_b, &info); DLAF_ASSERT(info == 0, info); info = -1; @@ -224,12 +224,12 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType } } else if constexpr (std::is_same_v) { - if (factorization == Factorization::do_factorization) { + if constexpr (factorization == Factorization::do_factorization) { C_dlaf_pssygvd(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); } else { - C_dlaf_pspotrf(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, &info); + C_dlaf_pspotrf(dlaf_uplo, (int) m, local_b_ptr, 1, 1, desc_b, &info); DLAF_ASSERT(info == 0, info); info = -1; @@ -239,12 +239,12 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType } } else if constexpr (std::is_same_v>) { - if (factorization == Factorization::do_factorization) { + if constexpr (factorization == Factorization::do_factorization) { C_dlaf_pzhegvd(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); } else { - C_dlaf_pzpotrf(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, &info); + C_dlaf_pzpotrf(dlaf_uplo, (int) m, local_b_ptr, 1, 1, desc_b, &info); DLAF_ASSERT(info == 0, info); info = -1; @@ -254,12 +254,12 @@ void testGenEigensolver(const blas::Uplo uplo, const SizeType m, const SizeType } } else if constexpr (std::is_same_v>) { - if (factorization == Factorization::do_factorization) { + if constexpr (factorization == Factorization::do_factorization) { C_dlaf_pchegvd(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, local_b_ptr, 1, 1, desc_b, eigenvalues_ptr, local_eigenvectors_ptr, 1, 1, desc_z, &info); } else { - C_dlaf_pcpotrf(dlaf_uplo, (int) m, local_a_ptr, 1, 1, desc_a, &info); + C_dlaf_pcpotrf(dlaf_uplo, (int) m, local_b_ptr, 1, 1, desc_b, &info); DLAF_ASSERT(info == 0, info); info = -1;