Skip to content

Commit

Permalink
Allow generalized eigensolver to re-use pre-factorized matrix (#1167)
Browse files Browse the repository at this point in the history
  • Loading branch information
RMeli authored Jun 21, 2024
1 parent ce42f63 commit ab48f04
Show file tree
Hide file tree
Showing 12 changed files with 846 additions and 141 deletions.
369 changes: 296 additions & 73 deletions include/dlaf/eigensolver/gen_eigensolver.h

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions include/dlaf/eigensolver/gen_eigensolver/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,17 @@
#include <dlaf/types.h>

namespace dlaf::eigensolver::internal {

enum class Factorization { do_factorization, already_factorized };

template <Backend backend, Device device, class T>
struct GenEigensolver {
static void call(blas::Uplo uplo, Matrix<T, device>& mat_a, Matrix<T, device>& mat_b,
Matrix<BaseType<T>, device>& eigenvalues, Matrix<T, device>& eigenvectors);
Matrix<BaseType<T>, device>& eigenvalues, Matrix<T, device>& eigenvectors,
const Factorization factorization);
static void call(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix<T, device>& mat_a,
Matrix<T, device>& mat_b, Matrix<BaseType<T>, device>& eigenvalues,
Matrix<T, device>& eigenvectors);
Matrix<T, device>& eigenvectors, const Factorization factorization);
};

// ETI
Expand Down
15 changes: 11 additions & 4 deletions include/dlaf/eigensolver/gen_eigensolver/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,17 @@
#include <dlaf/solver/triangular.h>
#include <dlaf/util_matrix.h>

#include "api.h"

namespace dlaf::eigensolver::internal {

template <Backend B, Device D, class T>
void GenEigensolver<B, D, T>::call(blas::Uplo uplo, Matrix<T, D>& mat_a, Matrix<T, D>& mat_b,
Matrix<BaseType<T>, D>& eigenvalues, Matrix<T, D>& eigenvectors) {
cholesky_factorization<B>(uplo, mat_b);
Matrix<BaseType<T>, D>& eigenvalues, Matrix<T, D>& eigenvectors,
const Factorization factorization) {
if (factorization == Factorization::do_factorization) {
cholesky_factorization<B>(uplo, mat_b);
}
generalized_to_standard<B>(uplo, mat_a, mat_b);

hermitian_eigensolver<B>(uplo, mat_a, eigenvalues, eigenvectors);
Expand All @@ -34,8 +39,10 @@ void GenEigensolver<B, D, T>::call(blas::Uplo uplo, Matrix<T, D>& mat_a, Matrix<
template <Backend B, Device D, class T>
void GenEigensolver<B, D, T>::call(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix<T, D>& mat_a,
Matrix<T, D>& mat_b, Matrix<BaseType<T>, D>& eigenvalues,
Matrix<T, D>& eigenvectors) {
cholesky_factorization<B>(grid, uplo, mat_b);
Matrix<T, D>& eigenvectors, const Factorization factorization) {
if (factorization == Factorization::do_factorization) {
cholesky_factorization<B>(grid, uplo, mat_b);
}
generalized_to_standard<B>(grid, uplo, mat_a, mat_b);

hermitian_eigensolver<B>(grid, uplo, mat_a, eigenvalues, eigenvectors);
Expand Down
3 changes: 2 additions & 1 deletion include/dlaf/eigensolver/gen_to_std.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ namespace dlaf::eigensolver::internal {
/// @pre @p mat_a has blocksize (NB x NB)
/// @pre @p mat_a has tilesize (NB x NB)
///
/// @param mat_b contains the triangular matrix. It can be lower (L) or upper (U). Only the tiles of
/// @param mat_b contains the Cholesky factorisation of the Hermitian positive definite matrix B
/// The triangular matrix can be lower (L) or upper (U). Only the tiles of
/// the matrix which contain the lower triangular or the upper triangular part are accessed.
/// Note: B should be modifiable as the diagonal tiles might be temporarily modified during the calculation.
/// @pre @p mat_b is not distributed
Expand Down
114 changes: 114 additions & 0 deletions include/dlaf_c/eigensolver/gen_eigensolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,52 @@ DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_z(
dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

/// Generalized eigensolver
///
/// @pre The matrices \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ are assumed to be
/// distributed and in host memory. The vector of eigenvalues \f$\mathbf{w}\f$ is assumed to be local
/// (non-distributed) and in host memory. Moving to and from GPU memory is handled internally.
///
/// @pre The matrix \f$\mathbf{B}\f$ is assumed to be factorized; it is the result of a Cholesky
/// factorization
///
/// @post The pika runtime is resumed when this function is called and suspended when the call
/// terminates.
///
/// @param dlaf_context context associated to the DLA-Future grid created with @ref dlaf_create_grid
/// @param uplo indicates whether the upper ('U') or lower ('L') triangular part of the global submatrix
/// \f$\mathbf{A}\f$ is referenced
/// @param a Local part of the global matrix \f$\mathbf{A}\f$
/// @param dlaf_desca DLA-Future descriptor of the global matrix \f$\mathbf{A}\f$
/// @param b Local part of the Cholesky factorization of the global matrix \f$\mathbf{B}\f$
/// @param dlaf_descb DLA-Future descriptor of the global matrix \f$\mathbf{B}\f$
/// @param w Local vector of eigenvalues (non-distributed)
/// @param z Local part of the global matrix \f$\mathbf{Z}\f$
/// @param dlaf_descz DLA-Future descriptor of the global matrix \f$\mathbf{Z}\f$
/// @return 0 if the eigensolver completed normally
DLAF_EXTERN_C int dlaf_symmetric_generalized_eigensolver_factorized_s(
const int dlaf_context, const char uplo, float* a, const struct DLAF_descriptor dlaf_desca, float* b,
const struct DLAF_descriptor dlaf_descb, float* w, float* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

/// @copydoc dlaf_symmetric_generalized_eigensolver_factorized_s
DLAF_EXTERN_C int dlaf_symmetric_generalized_eigensolver_factorized_d(
const int dlaf_context, const char uplo, double* a, const struct DLAF_descriptor dlaf_desca,
double* b, const struct DLAF_descriptor dlaf_descb, double* w, double* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

/// @copydoc dlaf_symmetric_generalized_eigensolver_factorized_s
DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_factorized_c(
const int dlaf_context, const char uplo, dlaf_complex_c* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_c* b, const struct DLAF_descriptor dlaf_descb, float* w, dlaf_complex_c* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

/// @copydoc dlaf_symmetric_generalized_eigensolver_factorized_s
DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_factorized_z(
const int dlaf_context, const char uplo, dlaf_complex_z* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

#ifdef DLAF_WITH_SCALAPACK

/// Generalized eigensolver
Expand Down Expand Up @@ -121,4 +167,72 @@ DLAF_EXTERN_C void dlaf_pzhegvd(const char uplo, const int n, dlaf_complex_z* a,
const int jb, const int descb[9], double* w, dlaf_complex_z* z,
const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT;

/// Generalized eigensolver
///
/// @remark This function is only available when DLAF_WITH_SCALAPACK=ON.
///
/// @pre The matrices \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ are assumed to be
/// distributed and in host memory. The vector of eigenvalues \f$\mathbf{w}\f$ is assumed to be local
/// (non-distributed) and in host memory. Moving to and from GPU memory is handled internally.
///
/// @pre The matrix \f$\mathbf{B}\f$ is assumed to be factorized; it is the result of a Cholesky
/// factorization
///
/// @pre Submatrices are currently not supported, so @p n is the size of the full matrices
/// \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ and @p ia, @p ja, @p ib, @p jb, @p iz, and
/// @p jz need to be 1.
///
/// @post The pika runtime is resumed when this function is called and suspended when the call
/// terminates.
///
/// @param uplo indicates whether the upper ('U') or lower ('L') triangular part of the global submatrix
/// \f$\mathbf{A}\f$ is referenced
/// @param n order of the sumbatrix \f$\mathbf{A}\f$ used in the computation
/// @param a Local part of the global matrix \f$\mathbf{A}\f$
/// @param ia row index of the global matrix \f$\mathbf{A}\f$ identifying the first row of the submatrix
/// $A$, has to be 1
/// @param ja column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{A}\f$, has to be 1
/// @param desca ScaLAPACK array descriptor of the global matrix \f$\mathbf{A}\f$
/// @param b Local part of the Cholesky factorization of the global matrix \f$\mathbf{B}\f$
/// @param ib row index of the global matrix \f$\mathbf{B}\f$ identifying the first row of the submatrix
/// \f$\mathbf{B}\f$, has to be 1
/// @param jb column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{B}\f$, has to be 1
/// @param descb ScaLAPACK array descriptor of the global matrix \f$\mathbf{B}\f$
/// @param w Local vector of eigenvalues (non-distributed)
/// @param z Local part of the global matrix \f$\mathbf{Z}\f$
/// @param iz row index of the global matrix \f$\mathbf{Z}\f$ identifying the first row of the submatrix
/// \f$\mathbf{Z}\f$, has to be 1
/// @param jz column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{A}\f$, has to be 1
/// @param descz ScaLAPACK array descriptor of the global matrix \f$\mathbf{Z}\f$
/// @param[out] info 0 if the eigensolver completed normally
DLAF_EXTERN_C void dlaf_pssygvd_factorized(const char uplo, const int n, float* a, const int ia,
const int ja, const int desca[9], float* b, const int ib,
const int jb, const int descb[9], float* w, float* z,
const int iz, const int jz, const int descz[9],
int* info) DLAF_NOEXCEPT;

/// @copydoc dlaf_pssygvx_factorized
DLAF_EXTERN_C void dlaf_pdsygvd_factorized(const char uplo, const int n, double* a, const int ia,
const int ja, const int desca[9], double* b, const int ib,
const int jb, const int descb[9], double* w, double* z,
const int iz, const int jz, const int descz[9],
int* info) DLAF_NOEXCEPT;

/// @copydoc dlaf_pssygvx_factorized
DLAF_EXTERN_C void dlaf_pchegvd_factorized(const char uplo, const int n, dlaf_complex_c* a, const int ia,
const int ja, const int desca[9], dlaf_complex_c* b,
const int ib, const int jb, const int descb[9], float* w,
dlaf_complex_c* z, const int iz, const int jz,
const int descz[9], int* info) DLAF_NOEXCEPT;

/// @copydoc dlaf_pssygvx_factorized
DLAF_EXTERN_C void dlaf_pzhegvd_factorized(const char uplo, const int n, dlaf_complex_z* a, const int ia,
const int ja, const int desca[9], dlaf_complex_z* b,
const int ib, const int jb, const int descb[9], double* w,
dlaf_complex_z* z, const int iz, const int jz,
const int descz[9], int* info) DLAF_NOEXCEPT;

#endif
64 changes: 64 additions & 0 deletions src/c_api/eigensolver/gen_eigensolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,40 @@ int dlaf_hermitian_generalized_eigensolver_z(const int dlaf_context, const char
dlaf_descb, w, z, dlaf_descz);
}

int dlaf_symmetric_generalized_eigensolver_factorized_s(
const int dlaf_context, const char uplo, float* a, const struct DLAF_descriptor dlaf_desca, float* b,
const struct DLAF_descriptor dlaf_descb, float* w, float* z,
const struct DLAF_descriptor dlaf_descz) noexcept {
return hermitian_generalized_eigensolver_factorized<float>(dlaf_context, uplo, a, dlaf_desca, b,
dlaf_descb, w, z, dlaf_descz);
}

int dlaf_symmetric_generalized_eigensolver_factorized_d(
const int dlaf_context, const char uplo, double* a, const struct DLAF_descriptor dlaf_desca,
double* b, const struct DLAF_descriptor dlaf_descb, double* w, double* z,
const struct DLAF_descriptor dlaf_descz) noexcept {
return hermitian_generalized_eigensolver_factorized<double>(dlaf_context, uplo, a, dlaf_desca, b,
dlaf_descb, w, z, dlaf_descz);
}

int dlaf_hermitian_generalized_eigensolver_factorized_c(
const int dlaf_context, const char uplo, dlaf_complex_c* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_c* b, const struct DLAF_descriptor dlaf_descb, float* w, dlaf_complex_c* z,
const struct DLAF_descriptor dlaf_descz) noexcept {
return hermitian_generalized_eigensolver_factorized<std::complex<float>>(dlaf_context, uplo, a,
dlaf_desca, b, dlaf_descb, w,
z, dlaf_descz);
}

int dlaf_hermitian_generalized_eigensolver_factorized_z(
const int dlaf_context, const char uplo, dlaf_complex_z* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z,
const struct DLAF_descriptor dlaf_descz) noexcept {
return hermitian_generalized_eigensolver_factorized<std::complex<double>>(dlaf_context, uplo, a,
dlaf_desca, b, dlaf_descb, w,
z, dlaf_descz);
}

#ifdef DLAF_WITH_SCALAPACK

void dlaf_pssygvd(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9],
Expand Down Expand Up @@ -81,4 +115,34 @@ void dlaf_pzhegvd(const char uplo, const int m, dlaf_complex_z* a, const int ia,
pxhegvd<std::complex<double>>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
}

void dlaf_pssygvd_factorized(const char uplo, const int m, float* a, const int ia, const int ja,
const int desca[9], float* b, const int ib, const int jb,
const int descb[9], float* w, float* z, const int iz, const int jz,
const int descz[9], int* info) noexcept {
pxhegvd_factorized<float>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
}

void dlaf_pdsygvd_factorized(const char uplo, const int m, double* a, const int ia, const int ja,
const int desca[9], double* b, const int ib, const int jb,
const int descb[9], double* w, double* z, const int iz, const int jz,
const int descz[9], int* info) noexcept {
pxhegvd_factorized<double>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
}

void dlaf_pchegvd_factorized(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja,
const int desca[9], dlaf_complex_c* b, const int ib, const int jb,
const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz,
const int descz[9], int* info) noexcept {
pxhegvd_factorized<std::complex<float>>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz,
descz, *info);
}

void dlaf_pzhegvd_factorized(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja,
const int desca[9], dlaf_complex_z* b, const int ib, const int jb,
const int descb[9], double* w, dlaf_complex_z* z, const int iz,
const int jz, const int descz[9], int* info) noexcept {
pxhegvd_factorized<std::complex<double>>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz,
descz, *info);
}

#endif
Loading

0 comments on commit ab48f04

Please sign in to comment.