Skip to content

Commit

Permalink
C and ScaLAPACK-like API for generalised eigensolver (#992)
Browse files Browse the repository at this point in the history
  • Loading branch information
RMeli authored Sep 28, 2023
1 parent f0b8277 commit b0a5525
Show file tree
Hide file tree
Showing 12 changed files with 825 additions and 67 deletions.
2 changes: 1 addition & 1 deletion include/dlaf_c/eigensolver/eigensolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ DLAF_EXTERN_C int dlaf_hermitian_eigensolver_z(
/// @param n order of the sumbatrix \f$\mathbf{A}\f$ used in the computation
/// @param a Local part of the global matrix \f$\mathbf{A}\f$
/// @param ia row index of the global matrix \f$\mathbf{A}\f$ identifying the first row of the submatrix
/// $A$, has to be 1
/// \f$\mathbf{A}\f$, has to be 1
/// @param ja column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{A}\f$, has to be 1
/// @param desca ScaLAPACK array descriptor of the global matrix \f$\mathbf{A}\f$
Expand Down
124 changes: 124 additions & 0 deletions include/dlaf_c/eigensolver/gen_eigensolver.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
//
// Distributed Linear Algebra with Future (DLAF)
//
// Copyright (c) 2018-2023, ETH Zurich
// All rights reserved.
//
// Please, refer to the LICENSE file in the root directory.
// SPDX-License-Identifier: BSD-3-Clause
//

#pragma once

/// @file

#include <dlaf_c/desc.h>
#include <dlaf_c/utils.h>

/// Generalized eigensolver
///
/// @pre The matrices \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ are assumed to be
/// distributed and in host memory. The vector of eigenvalues \f$\mathbf{w}\f$ is assumed to be local
/// (non-distributed) and in host memory. Moving to and from GPU memory is handled internally.
///
/// @post The pika runtime is resumed when this function is called and suspended when the call
/// terminates.
///
/// @param dlaf_context context associated to the DLA-Future grid created with @ref dlaf_create_grid
/// @param uplo indicates whether the upper ('U') or lower ('L') triangular part of the global submatrix
/// \f$\mathbf{A}\f$ is referenced
/// @param a Local part of the global matrix \f$\mathbf{A}\f$
/// @param dlaf_desca DLA-Future descriptor of the global matrix \f$\mathbf{A}\f$
/// @param b Local part of the global matrix \f$\mathbf{B}\f$
/// @param dlaf_descb DLA-Future descriptor of the global matrix \f$\mathbf{B}\f$
/// @param w Local vector of eigenvalues (non-distributed)
/// @param z Local part of the global matrix \f$\mathbf{Z}\f$
/// @param dlaf_descz DLA-Future descriptor of the global matrix \f$\mathbf{Z}\f$
/// @return 0 if the eigensolver completed normally
DLAF_EXTERN_C int dlaf_symmetric_generalized_eigensolver_s(
const int dlaf_context, const char uplo, float* a, const struct DLAF_descriptor dlaf_desca, float* b,
const struct DLAF_descriptor dlaf_descb, float* w, float* z,
const struct DLAF_descriptor dlaf_descz);

/// @copydoc dlaf_symmetric_generalized_eigensolver_s
DLAF_EXTERN_C int dlaf_symmetric_generalized_eigensolver_d(
const int dlaf_context, const char uplo, double* a, const struct DLAF_descriptor dlaf_desca,
double* b, const struct DLAF_descriptor dlaf_descb, double* w, double* z,
const struct DLAF_descriptor dlaf_descz);

/// @copydoc dlaf_symmetric_generalized_eigensolver_s
DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_c(
const int dlaf_context, const char uplo, dlaf_complex_c* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_c* b, const struct DLAF_descriptor dlaf_descb, float* w, dlaf_complex_c* z,
const struct DLAF_descriptor dlaf_descz);

/// @copydoc dlaf_symmetric_generalized_eigensolver_s
DLAF_EXTERN_C int dlaf_hermitian_generalized_eigensolver_z(
const int dlaf_context, const char uplo, dlaf_complex_z* a, const struct DLAF_descriptor dlaf_desca,
dlaf_complex_z* b, const struct DLAF_descriptor dlaf_descb, double* w, dlaf_complex_z* z,
const struct DLAF_descriptor dlaf_descz);

#ifdef DLAF_WITH_SCALAPACK

/// Generalized eigensolver
///
/// @remark This function is only available when DLAF_WITH_SCALAPACK=ON.
///
/// @pre The matrices \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ are assumed to be
/// distributed and in host memory. The vector of eigenvalues \f$\mathbf{w}\f$ is assumed to be local
/// (non-distributed) and in host memory. Moving to and from GPU memory is handled internally.
///
/// @pre Submatrices are currently not supported, so @p n is the size of the full matrices
/// \f$\mathbf{A}\f$, \f$\mathbf{B}\f$, and \f$\mathbf{Z}\f$ and @p ia, @p ja, @p ib, @p jb, @p iz, and
/// @p jz need to be 1.
///
/// @post The pika runtime is resumed when this function is called and suspended when the call
/// terminates.
///
/// @param uplo indicates whether the upper ('U') or lower ('L') triangular part of the global submatrix
/// \f$\mathbf{A}\f$ is referenced
/// @param n order of the sumbatrix \f$\mathbf{A}\f$ used in the computation
/// @param a Local part of the global matrix \f$\mathbf{A}\f$
/// @param ia row index of the global matrix \f$\mathbf{A}\f$ identifying the first row of the submatrix
/// $A$, has to be 1
/// @param ja column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{A}\f$, has to be 1
/// @param desca ScaLAPACK array descriptor of the global matrix \f$\mathbf{A}\f$
/// @param b Local part of the global matrix \f$\mathbf{B}\f$
/// @param ib row index of the global matrix \f$\mathbf{B}\f$ identifying the first row of the submatrix
/// \f$\mathbf{B}\f$, has to be 1
/// @param jb column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{B}\f$, has to be 1
/// @param descb ScaLAPACK array descriptor of the global matrix \f$\mathbf{B}\f$
/// @param w Local vector of eigenvalues (non-distributed)
/// @param z Local part of the global matrix \f$\mathbf{Z}\f$
/// @param iz row index of the global matrix \f$\mathbf{Z}\f$ identifying the first row of the submatrix
/// \f$\mathbf{Z}\f$, has to be 1
/// @param jz column index of the global matrix \f$\mathbf{A}\f$ identifying the first column of the
/// submatrix \f$\mathbf{A}\f$, has to be 1
/// @param descz ScaLAPACK array descriptor of the global matrix \f$\mathbf{Z}\f$
/// @param[out] info 0 if the eigensolver completed normally
DLAF_EXTERN_C void dlaf_pssygvx(const char uplo, const int n, float* a, const int ia, const int ja,
const int desca[9], float* b, const int ib, const int jb,
const int descb[9], float* w, float* z, const int iz, const int jz,
const int descz[9], int* info);

/// @copydoc dlaf_pssygvx
DLAF_EXTERN_C void dlaf_pdsygvx(const char uplo, const int n, double* a, const int ia, const int ja,
const int desca[9], double* b, const int ib, const int jb,
const int descb[9], double* w, double* z, const int iz, const int jz,
const int descz[9], int* info);

/// @copydoc dlaf_pssygvx
DLAF_EXTERN_C void dlaf_pchegvx(const char uplo, const int n, dlaf_complex_c* a, const int ia,
const int ja, const int desca[9], dlaf_complex_c* b, const int ib,
const int jb, const int descb[9], float* w, dlaf_complex_c* z,
const int iz, const int jz, const int descz[9], int* info);

/// @copydoc dlaf_pssygvx
DLAF_EXTERN_C void dlaf_pzhegvx(const char uplo, const int n, dlaf_complex_z* a, const int ia,
const int ja, const int desca[9], dlaf_complex_z* b, const int ib,
const int jb, const int descb[9], double* w, dlaf_complex_z* z,
const int iz, const int jz, const int descz[9], int* info);

#endif
5 changes: 3 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,9 @@ DLAF_addSublibrary(

# Define DLAF's C API library
DLAF_addSublibrary(
c_api SOURCES c_api/eigensolver/eigensolver.cpp c_api/factorization/cholesky.cpp c_api/grid.cpp
c_api/init.cpp c_api/utils.cpp LIBRARIES dlaf.core dlaf.factorization dlaf.eigensolver
c_api SOURCES c_api/eigensolver/eigensolver.cpp c_api/eigensolver/gen_eigensolver.cpp
c_api/factorization/cholesky.cpp c_api/grid.cpp c_api/init.cpp c_api/utils.cpp
LIBRARIES dlaf.core dlaf.factorization dlaf.eigensolver
)

# Define DLAF's complete library
Expand Down
83 changes: 83 additions & 0 deletions src/c_api/eigensolver/gen_eigensolver.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
//
// Distributed Linear Algebra with Future (DLAF)
//
// Copyright (c) 2018-2023, ETH Zurich
// All rights reserved.
//
// Please, refer to the LICENSE file in the root directory.
// SPDX-License-Identifier: BSD-3-Clause
//

#include "gen_eigensolver.h"

#include <complex>

#include <dlaf_c/eigensolver/gen_eigensolver.h>
#include <dlaf_c/init.h>
#include <dlaf_c/utils.h>

#include "dlaf_c/desc.h"

int dlaf_symmetric_generalized_eigensolver_s(const int dlaf_context, const char uplo, float* a,
const struct DLAF_descriptor dlaf_desca, float* b,
const struct DLAF_descriptor dlaf_descb, float* w, float* z,
const struct DLAF_descriptor dlaf_descz) {
return hermitian_generalized_eigensolver<float>(dlaf_context, uplo, a, dlaf_desca, b, dlaf_descb, w, z,
dlaf_descz);
}

int dlaf_symmetric_generalized_eigensolver_d(const int dlaf_context, const char uplo, double* a,
const struct DLAF_descriptor dlaf_desca, double* b,
const struct DLAF_descriptor dlaf_descb, double* w,
double* z, const struct DLAF_descriptor dlaf_descz) {
return hermitian_generalized_eigensolver<double>(dlaf_context, uplo, a, dlaf_desca, b, dlaf_descb, w,
z, dlaf_descz);
}

int dlaf_hermitian_generalized_eigensolver_c(const int dlaf_context, const char uplo, dlaf_complex_c* a,
const struct DLAF_descriptor dlaf_desca, dlaf_complex_c* b,
const struct DLAF_descriptor dlaf_descb, float* w,
dlaf_complex_c* z,
const struct DLAF_descriptor dlaf_descz) {
return hermitian_generalized_eigensolver<std::complex<float>>(dlaf_context, uplo, a, dlaf_desca, b,
dlaf_descb, w, z, dlaf_descz);
}

int dlaf_hermitian_generalized_eigensolver_z(const int dlaf_context, const char uplo, dlaf_complex_z* a,
const struct DLAF_descriptor dlaf_desca, dlaf_complex_z* b,
const struct DLAF_descriptor dlaf_descb, double* w,
dlaf_complex_z* z,
const struct DLAF_descriptor dlaf_descz) {
return hermitian_generalized_eigensolver<std::complex<double>>(dlaf_context, uplo, a, dlaf_desca, b,
dlaf_descb, w, z, dlaf_descz);
}

#ifdef DLAF_WITH_SCALAPACK

void dlaf_pssygvx(const char uplo, const int m, float* a, const int ia, const int ja, const int desca[9],
float* b, const int ib, const int jb, const int descb[9], float* w, float* z,
const int iz, const int jz, const int descz[9], int* info) {
pxhegvx<float>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
}

void dlaf_pdsygvx(const char uplo, const int m, double* a, const int ia, const int ja,
const int desca[9], double* b, const int ib, const int jb, const int descb[9],
double* w, double* z, const int iz, const int jz, const int descz[9], int* info) {
pxhegvx<double>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
}

void dlaf_pchegvx(const char uplo, const int m, dlaf_complex_c* a, const int ia, const int ja,
const int desca[9], dlaf_complex_c* b, const int ib, const int jb, const int descb[9],
float* w, dlaf_complex_c* z, const int iz, const int jz, const int descz[9],
int* info) {
pxhegvx<std::complex<float>>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
}

void dlaf_pzhegvx(const char uplo, const int m, dlaf_complex_z* a, const int ia, const int ja,
const int desca[9], dlaf_complex_z* b, const int ib, const int jb, const int descb[9],
double* w, dlaf_complex_z* z, const int iz, const int jz, const int descz[9],
int* info) {
pxhegvx<std::complex<double>>(uplo, m, a, ia, ja, desca, b, ib, jb, descb, w, z, iz, jz, descz, *info);
}

#endif
108 changes: 108 additions & 0 deletions src/c_api/eigensolver/gen_eigensolver.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
//
// Distributed Linear Algebra with Future (DLAF)
//
// Copyright (c) 2018-2023, ETH Zurich
// All rights reserved.
//
// Please, refer to the LICENSE file in the root directory.
// SPDX-License-Identifier: BSD-3-Clause
//

#pragma once

#include <blas.hh>
#include <mpi.h>

#include <pika/init.hpp>

#include <dlaf/common/assert.h>
#include <dlaf/eigensolver/gen_eigensolver.h>
#include <dlaf/matrix/matrix.h>
#include <dlaf/matrix/matrix_mirror.h>
#include <dlaf/types.h>
#include <dlaf_c/desc.h>
#include <dlaf_c/grid.h>

#include "../blacs.h"
#include "../grid.h"
#include "../utils.h"

template <typename T>
int hermitian_generalized_eigensolver(const int dlaf_context, const char uplo, T* a,
const DLAF_descriptor dlaf_desca, T* b,
const DLAF_descriptor dlaf_descb, dlaf::BaseType<T>* w, T* z,
const DLAF_descriptor dlaf_descz) {
using MatrixHost = dlaf::matrix::Matrix<T, dlaf::Device::CPU>;
using MatrixMirror = dlaf::matrix::MatrixMirror<T, dlaf::Device::Default, dlaf::Device::CPU>;
using MatrixBaseMirror =
dlaf::matrix::MatrixMirror<dlaf::BaseType<T>, dlaf::Device::Default, dlaf::Device::CPU>;

DLAF_ASSERT(dlaf_desca.i == 0, dlaf_desca.i);
DLAF_ASSERT(dlaf_desca.j == 0, dlaf_desca.j);
DLAF_ASSERT(dlaf_descb.i == 0, dlaf_descb.i);
DLAF_ASSERT(dlaf_descb.j == 0, dlaf_descb.j);
DLAF_ASSERT(dlaf_descz.i == 0, dlaf_descz.i);
DLAF_ASSERT(dlaf_descz.j == 0, dlaf_descz.j);

pika::resume();

auto communicator_grid = dlaf_grids.at(dlaf_context);

auto [distribution_a, layout_a] = distribution_and_layout(dlaf_desca, communicator_grid);
auto [distribution_b, layout_b] = distribution_and_layout(dlaf_descb, communicator_grid);
auto [distribution_z, layout_z] = distribution_and_layout(dlaf_descz, communicator_grid);

MatrixHost matrix_host_a(distribution_a, layout_a, a);
MatrixHost matrix_host_b(distribution_b, layout_b, b);
MatrixHost eigenvectors_host(distribution_z, layout_z, z);
auto eigenvalues_host =
dlaf::matrix::createMatrixFromColMajor<dlaf::Device::CPU>({dlaf_descz.m, 1}, {dlaf_descz.mb, 1},
std::max(dlaf_descz.m, 1), w);

{
MatrixMirror matrix_a(matrix_host_a);
MatrixMirror matrix_b(matrix_host_b);
MatrixMirror eigenvectors(eigenvectors_host);
MatrixBaseMirror eigenvalues(eigenvalues_host);

dlaf::hermitian_generalized_eigensolver<dlaf::Backend::Default, dlaf::Device::Default, T>(
communicator_grid, blas::char2uplo(uplo), matrix_a.get(), matrix_b.get(), eigenvalues.get(),
eigenvectors.get());
} // Destroy mirror

// Ensure data is copied back to the host
eigenvalues_host.waitLocalTiles();
eigenvectors_host.waitLocalTiles();

pika::suspend();
return 0;
}

#ifdef DLAF_WITH_SCALAPACK

template <typename T>
void pxhegvx(const char uplo, const int m, T* a, const int ia, const int ja, const int desca[9], T* b,
const int ib, const int jb, const int descb[9], dlaf::BaseType<T>* w, T* z, const int iz,
int jz, const int descz[9], int& info) {
DLAF_ASSERT(desca[0] == 1, desca[0]);
DLAF_ASSERT(descb[0] == 1, descb[0]);
DLAF_ASSERT(descz[0] == 1, descz[0]);
DLAF_ASSERT(desca[1] == descb[1], desca[1], descb[1]);
DLAF_ASSERT(desca[1] == descz[1], desca[1], descz[1]);
DLAF_ASSERT(ia == 1, ia);
DLAF_ASSERT(ja == 1, ja);
DLAF_ASSERT(ib == 1, ib);
DLAF_ASSERT(jb == 1, jb);
DLAF_ASSERT(iz == 1, iz);
DLAF_ASSERT(iz == 1, iz);

auto dlaf_desca = make_dlaf_descriptor(m, m, ia, ja, desca);
auto dlaf_descb = make_dlaf_descriptor(m, m, ib, jb, descb);
auto dlaf_descz = make_dlaf_descriptor(m, m, iz, jz, descz);

auto _info =
hermitian_generalized_eigensolver(desca[1], uplo, a, dlaf_desca, b, dlaf_descb, w, z, dlaf_descz);
info = _info;
}

#endif
Loading

0 comments on commit b0a5525

Please sign in to comment.