Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow generalized eigensolver to re-use pre-factorized matrix #1167

Merged
merged 16 commits into from
Jun 21, 2024
369 changes: 296 additions & 73 deletions include/dlaf/eigensolver/gen_eigensolver.h

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions include/dlaf/eigensolver/gen_eigensolver/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,17 @@
#include <dlaf/types.h>

namespace dlaf::eigensolver::internal {

enum class Factorization { do_factorization, already_factorized };

template <Backend backend, Device device, class T>
struct GenEigensolver {
static void call(blas::Uplo uplo, Matrix<T, device>& mat_a, Matrix<T, device>& mat_b,
Matrix<BaseType<T>, device>& eigenvalues, Matrix<T, device>& eigenvectors);
Matrix<BaseType<T>, device>& eigenvalues, Matrix<T, device>& eigenvectors,
const Factorization factorization);
static void call(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix<T, device>& mat_a,
Matrix<T, device>& mat_b, Matrix<BaseType<T>, device>& eigenvalues,
Matrix<T, device>& eigenvectors);
Matrix<T, device>& eigenvectors, const Factorization factorization);
};

// ETI
Expand Down
15 changes: 11 additions & 4 deletions include/dlaf/eigensolver/gen_eigensolver/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,17 @@
#include <dlaf/solver/triangular.h>
#include <dlaf/util_matrix.h>

#include "api.h"

namespace dlaf::eigensolver::internal {

template <Backend B, Device D, class T>
void GenEigensolver<B, D, T>::call(blas::Uplo uplo, Matrix<T, D>& mat_a, Matrix<T, D>& mat_b,
Matrix<BaseType<T>, D>& eigenvalues, Matrix<T, D>& eigenvectors) {
cholesky_factorization<B>(uplo, mat_b);
Matrix<BaseType<T>, D>& eigenvalues, Matrix<T, D>& eigenvectors,
const Factorization factorization) {
if (factorization == Factorization::do_factorization) {
cholesky_factorization<B>(uplo, mat_b);
}
generalized_to_standard<B>(uplo, mat_a, mat_b);

hermitian_eigensolver<B>(uplo, mat_a, eigenvalues, eigenvectors);
Expand All @@ -34,8 +39,10 @@ void GenEigensolver<B, D, T>::call(blas::Uplo uplo, Matrix<T, D>& mat_a, Matrix<
template <Backend B, Device D, class T>
void GenEigensolver<B, D, T>::call(comm::CommunicatorGrid& grid, blas::Uplo uplo, Matrix<T, D>& mat_a,
Matrix<T, D>& mat_b, Matrix<BaseType<T>, D>& eigenvalues,
Matrix<T, D>& eigenvectors) {
cholesky_factorization<B>(grid, uplo, mat_b);
Matrix<T, D>& eigenvectors, const Factorization factorization) {
if (factorization == Factorization::do_factorization) {
cholesky_factorization<B>(grid, uplo, mat_b);
}
generalized_to_standard<B>(grid, uplo, mat_a, mat_b);

hermitian_eigensolver<B>(grid, uplo, mat_a, eigenvalues, eigenvectors);
Expand Down
3 changes: 2 additions & 1 deletion include/dlaf/eigensolver/gen_to_std.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ namespace dlaf::eigensolver::internal {
/// @pre @p mat_a has blocksize (NB x NB)
/// @pre @p mat_a has tilesize (NB x NB)
///
/// @param mat_b contains the triangular matrix. It can be lower (L) or upper (U). Only the tiles of
/// @param mat_b contains the Cholesky factorisation of the Hermitian positive definite matrix B
/// The triangular matrix can be lower (L) or upper (U). Only the tiles of
/// the matrix which contain the lower triangular or the upper triangular part are accessed.
/// Note: B should be modifiable as the diagonal tiles might be temporarily modified during the calculation.
/// @pre @p mat_b is not distributed
Expand Down
114 changes: 114 additions & 0 deletions include/dlaf_c/eigensolver/gen_eigensolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,52 @@ DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_z(
dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

/// Generalized eigensolver
///
/// @pre The matrices \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ are assumed to be
/// distributed and in host memory. The vector of eigenvalues \f$\mathbf{w}\f$ is assumed to be local
/// (non-distributed) and in host memory. Moving to and from GPU memory is handled internally.
///
/// @pre The matrix \f$\mathbf{B}\f$ is assumed to be factorized; it is the result of a Cholesky
/// factorization
///
/// @post The pika runtime is resumed when this function is called and suspended when the call
/// terminates.
///
/// @param dlaf_context context associated to the DLA-Future grid created with @ref dlaf_create_grid
/// @param uplo indicates whether the upper ('U') or lower ('L') triangular part of the global submatrix
/// \f$\mathbf{A}\f$ is referenced
/// @param a Local part of the global matrix \f$\mathbf{A}\f$
/// @param dlaf_desca DLA-Future descriptor of the global matrix \f$\mathbf{A}\f$
/// @param b Local part of the Cholesky factorization of the global matrix \f$\mathbf{B}\f$
/// @param dlaf_descb DLA-Future descriptor of the global matrix \f$\mathbf{B}\f$
/// @param w Local vector of eigenvalues (non-distributed)
/// @param z Local part of the global matrix \f$\mathbf{Z}\f$
/// @param dlaf_descz DLA-Future descriptor of the global matrix \f$\mathbf{Z}\f$
/// @return 0 if the eigensolver completed normally
DLAF_EXTERN_C int dlaf_symmetric_generalized_eigensolver_factorized_s(
const int dlaf_context, const char uplo, float* a, const struct DLAF_descriptor dlaf_desca, float* b,
const struct DLAF_descriptor dlaf_descb, float* w, float* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

/// @copydoc dlaf_symmetric_generalized_eigensolver_factorized_s
DLAF_EXTERN_C int dlaf_symmetric_generalized_eigensolver_factorized_d(
const int dlaf_context, const char uplo, double* a, const struct DLAF_descriptor dlaf_desca,
double* b, const struct DLAF_descriptor dlaf_descb, double* w, double* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

/// @copydoc dlaf_symmetric_generalized_eigensolver_factorized_s
DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_factorized_c(
const int dlaf_context, const char uplo, dlaf_complex_c* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_c* b, const struct DLAF_descriptor dlaf_descb, float* w, dlaf_complex_c* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

/// @copydoc dlaf_symmetric_generalized_eigensolver_factorized_s
DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_factorized_z(
const int dlaf_context, const char uplo, dlaf_complex_z* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z,
const struct DLAF_descriptor dlaf_descz) DLAF_NOEXCEPT;

#ifdef DLAF_WITH_SCALAPACK

/// Generalized eigensolver
Expand Down Expand Up @@ -121,4 +167,72 @@ DLAF_EXTERN_C void dlaf_pzhegvd(const char uplo, const int n, dlaf_complex_z* a,
const int jb, const int descb[9], double* w, dlaf_complex_z* z,
const int iz, const int jz, const int descz[9], int* info) DLAF_NOEXCEPT;

/// Generalized eigensolver
///
/// @remark This function is only available when DLAF_WITH_SCALAPACK=ON.
///
/// @pre The matrices \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ are assumed to be
/// distributed and in host memory. The vector of eigenvalues \f$\mathbf{w}\f$ is assumed to be local
/// (non-distributed) and in host memory. Moving to and from GPU memory is handled internally.
///
/// @pre The matrix \f$\mathbf{B}\f$ is assumed to be factorized; it is the result of a Cholesky
/// factorization
///
/// @pre Submatrices are currently not supported, so @p n is the size of the full matrices
/// \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ and @p ia, @p ja, @p ib, @p jb, @p iz, and
/// @p jz need to be 1.
///
/// @post The pika runtime is resumed when this function is called and suspended when the call
/// terminates.
///
/// @param uplo indicates whether the upper ('U') or lower ('L') triangular part of the global submatrix
/// \f$\mathbf{A}\f$ is referenced
/// @param n order of the sumbatrix \f$\mathbf{A}\f$ used in the computation
/// @param a Local part of the global matrix \f$\mathbf{A}\f$
/// @param ia row index of the global matrix \f$\mathbf{A}\f$ identifying the first row of the submatrix
/// $A$, has to be 1
/// @param ja column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{A}\f$, has to be 1
/// @param desca ScaLAPACK array descriptor of the global matrix \f$\mathbf{A}\f$
/// @param b Local part of the Cholesky factorization of the global matrix \f$\mathbf{B}\f$
/// @param ib row index of the global matrix \f$\mathbf{B}\f$ identifying the first row of the submatrix
/// \f$\mathbf{B}\f$, has to be 1
/// @param jb column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{B}\f$, has to be 1
/// @param descb ScaLAPACK array descriptor of the global matrix \f$\mathbf{B}\f$
/// @param w Local vector of eigenvalues (non-distributed)
/// @param z Local part of the global matrix \f$\mathbf{Z}\f$
/// @param iz row index of the global matrix \f$\mathbf{Z}\f$ identifying the first row of the submatrix
/// \f$\mathbf{Z}\f$, has to be 1
/// @param jz column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{A}\f$, has to be 1
/// @param descz ScaLAPACK array descriptor of the global matrix \f$\mathbf{Z}\f$
/// @param[out] info 0 if the eigensolver completed normally
DLAF_EXTERN_C void dlaf_pssygvd_factorized(const char uplo, const int n, float* a, const int ia,
const int ja, const int desca[9], float* b, const int ib,
const int jb, const int descb[9], float* w, float* z,
const int iz, const int jz, const int descz[9],
int* info) DLAF_NOEXCEPT;

/// @copydoc dlaf_pssygvx_factorized
DLAF_EXTERN_C void dlaf_pdsygvd_factorized(const char uplo, const int n, double* a, const int ia,
const int ja, const int desca[9], double* b, const int ib,
const int jb, const int descb[9], double* w, double* z,
const int iz, const int jz, const int descz[9],
int* info) DLAF_NOEXCEPT;

/// @copydoc dlaf_pssygvx_factorized
DLAF_EXTERN_C void dlaf_pchegvd_factorized(const char uplo, const int n, dlaf_complex_c* a, const int ia,
const int ja, const int desca[9], dlaf_complex_c* b,
const int ib, const int jb, const int descb[9], float* w,
dlaf_complex_c* z, const int iz, const int jz,
const int descz[9], int* info) DLAF_NOEXCEPT;

/// @copydoc dlaf_pssygvx_factorized
DLAF_EXTERN_C void dlaf_pzhegvd_factorized(const char uplo, const int n, dlaf_complex_z* a, const int ia,
const int ja, const int desca[9], dlaf_complex_z* b,
const int ib, const int jb, const int descb[9], double* w,
dlaf_complex_z* z, const int iz, const int jz,
const int descz[9], int* info) DLAF_NOEXCEPT;

#endif
64 changes: 64 additions & 0 deletions src/c_api/eigensolver/gen_eigensolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,40 @@ int dlaf_hermitian_generalized_eigensolver_z(const int dlaf_context, const char
dlaf_descb, w, z, dlaf_descz);
}

int dlaf_symmetric_generalized_eigensolver_factorized_s(
const int dlaf_context, const char uplo, float* a, const struct DLAF_descriptor dlaf_desca, float* b,
const struct DLAF_descriptor dlaf_descb, float* w, float* z,
const struct DLAF_descriptor dlaf_descz) noexcept {
return hermitian_generalized_eigensolver_factorized<float>(dlaf_context, uplo, a, dlaf_desca, b,
dlaf_descb, w, z, dlaf_descz);
}

int dlaf_symmetric_generalized_eigensolver_factorized_d(
const int dlaf_context, const char uplo, double* a, const struct DLAF_descriptor dlaf_desca,
double* b, const struct DLAF_descriptor dlaf_descb, double* w, double* z,
const struct DLAF_descriptor dlaf_descz) noexcept {
return hermitian_generalized_eigensolver_factorized<double>(dlaf_context, uplo, a, dlaf_desca, b,
dlaf_descb, w, z, dlaf_descz);
}

int dlaf_hermitian_generalized_eigensolver_factorized_c(
const int dlaf_context, const char uplo, dlaf_complex_c* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_c* b, const struct DLAF_descriptor dlaf_descb, float* w, dlaf_complex_c* z,
const struct DLAF_descriptor dlaf_descz) noexcept {
return hermitian_generalized_eigensolver_factorized<std::complex<float>>(dlaf_context, uplo, a,
dlaf_desca, b, dlaf_descb, w,
z, dlaf_descz);
}

int dlaf_hermitian_generalized_eigensolver_factorized_z(
const int dlaf_context, const char uplo, dlaf_complex_z* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z,
const struct DLAF_descriptor dlaf_descz) noexcept {
return hermitian_generalized_eigensolver_factorized<std::complex<double>>(dlaf_context, uplo, a,
dlaf_desca, b, dlaf_descb, w,
z, dlaf_descz);
}

#ifdef DLAF_WITH_SCALAPACK

void dlaf_pssygvd(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9],
Expand Down Expand Up @@ -81,4 +115,34 @@ void dlaf_pzhegvd(const char uplo, const int m, dlaf_complex_z* a, const int ia,
pxhegvd<std::complex<double>>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
RMeli marked this conversation as resolved.
Show resolved Hide resolved
}

void dlaf_pssygvd_factorized(const char uplo, const int m, float* a, const int ia, const int ja,
const int desca[9], float* b, const int ib, const int jb,
const int descb[9], float* w, float* z, const int iz, const int jz,
const int descz[9], int* info) noexcept {
pxhegvd_factorized<float>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
}

void dlaf_pdsygvd_factorized(const char uplo, const int m, double* a, const int ia, const int ja,
const int desca[9], double* b, const int ib, const int jb,
const int descb[9], double* w, double* z, const int iz, const int jz,
const int descz[9], int* info) noexcept {
pxhegvd_factorized<double>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
}

void dlaf_pchegvd_factorized(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja,
const int desca[9], dlaf_complex_c* b, const int ib, const int jb,
const int descb[9], float* w, dlaf_complex_c* z, const int iz, const int jz,
const int descz[9], int* info) noexcept {
pxhegvd_factorized<std::complex<float>>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz,
descz, *info);
}

void dlaf_pzhegvd_factorized(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja,
const int desca[9], dlaf_complex_z* b, const int ib, const int jb,
const int descb[9], double* w, dlaf_complex_z* z, const int iz,
const int jz, const int descz[9], int* info) noexcept {
pxhegvd_factorized<std::complex<double>>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz,
descz, *info);
}

#endif
Loading
Loading