Skip to content

Commit

Permalink
Use 64 bit CuSolver API for Eigen decomposition (#349)
Browse files Browse the repository at this point in the history
This PR replace the legacy call to `cusolverDnsyevj` with the 64-bit version.
It also improves the indexing types used, from int to size_t when possible.

Authors:
  - Micka (https://github.com/lowener)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #349
  • Loading branch information
lowener authored Nov 5, 2021
1 parent 091cfdf commit 3848fa7
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 16 deletions.
71 changes: 71 additions & 0 deletions cpp/include/raft/linalg/cusolver_wrappers.h
Original file line number Diff line number Diff line change
Expand Up @@ -719,5 +719,76 @@ inline cusolverStatus_t cusolverSpcsrqrsvBatched( // NOLINT
}
/** @} */

#if CUDART_VERSION >= 11010
/**
* @defgroup DnXsyevd cusolver DnXsyevd operations
* @{
*/
template <typename T>
cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT
cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz,
cublasFillMode_t uplo, int64_t n, const T *A, int64_t lda, const T *W,
size_t *workspaceInBytesOnDevice, size_t *workspaceInBytesOnHost,
cudaStream_t stream);

template <>
inline cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT
cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz,
cublasFillMode_t uplo, int64_t n, const float *A, int64_t lda, const float *W,
size_t *workspaceInBytesOnDevice, size_t *workspaceInBytesOnHost,
cudaStream_t stream) {
CUSOLVER_CHECK(cusolverDnSetStream(handle, stream));
return cusolverDnXsyevd_bufferSize(
handle, params, jobz, uplo, n, CUDA_R_32F, A, lda, CUDA_R_32F, W,
CUDA_R_32F, workspaceInBytesOnDevice, workspaceInBytesOnHost);
}

template <>
inline cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT
cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz,
cublasFillMode_t uplo, int64_t n, const double *A, int64_t lda,
const double *W, size_t *workspaceInBytesOnDevice,
size_t *workspaceInBytesOnHost, cudaStream_t stream) {
CUSOLVER_CHECK(cusolverDnSetStream(handle, stream));
return cusolverDnXsyevd_bufferSize(
handle, params, jobz, uplo, n, CUDA_R_64F, A, lda, CUDA_R_64F, W,
CUDA_R_64F, workspaceInBytesOnDevice, workspaceInBytesOnHost);
}

template <typename T>
cusolverStatus_t cusolverDnxsyevd( // NOLINT
cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz,
cublasFillMode_t uplo, int64_t n, T *A, int64_t lda, T *W, T *bufferOnDevice,
size_t workspaceInBytesOnDevice, T *bufferOnHost,
size_t workspaceInBytesOnHost, int *info, cudaStream_t stream);

template <>
inline cusolverStatus_t cusolverDnxsyevd( // NOLINT
cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz,
cublasFillMode_t uplo, int64_t n, float *A, int64_t lda, float *W,
float *bufferOnDevice, size_t workspaceInBytesOnDevice, float *bufferOnHost,
size_t workspaceInBytesOnHost, int *info, cudaStream_t stream) {
CUSOLVER_CHECK(cusolverDnSetStream(handle, stream));
return cusolverDnXsyevd(handle, params, jobz, uplo, n, CUDA_R_32F, A, lda,
CUDA_R_32F, W, CUDA_R_32F, bufferOnDevice,
workspaceInBytesOnDevice, bufferOnHost,
workspaceInBytesOnHost, info);
}

template <>
inline cusolverStatus_t cusolverDnxsyevd( // NOLINT
cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz,
cublasFillMode_t uplo, int64_t n, double *A, int64_t lda, double *W,
double *bufferOnDevice, size_t workspaceInBytesOnDevice, double *bufferOnHost,
size_t workspaceInBytesOnHost, int *info, cudaStream_t stream) {
CUSOLVER_CHECK(cusolverDnSetStream(handle, stream));
return cusolverDnXsyevd(handle, params, jobz, uplo, n, CUDA_R_64F, A, lda,
CUDA_R_64F, W, CUDA_R_64F, bufferOnDevice,
workspaceInBytesOnDevice, bufferOnHost,
workspaceInBytesOnHost, info);
}
/** @} */
#endif

} // namespace linalg
} // namespace raft
74 changes: 58 additions & 16 deletions cpp/include/raft/linalg/eig.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,34 @@
namespace raft {
namespace linalg {

template <typename math_t>
void eigDC_legacy(const raft::handle_t &handle, const math_t *in,
std::size_t n_rows, std::size_t n_cols, math_t *eig_vectors,
math_t *eig_vals, cudaStream_t stream) {
cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();

int lwork;
CUSOLVER_CHECK(cusolverDnsyevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR,
CUBLAS_FILL_MODE_UPPER, n_rows, in,
n_cols, eig_vals, &lwork));

rmm::device_uvector<math_t> d_work(lwork, stream);
rmm::device_scalar<int> d_dev_info(stream);

raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream);

CUSOLVER_CHECK(cusolverDnsyevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR,
CUBLAS_FILL_MODE_UPPER, n_rows, eig_vectors,
n_cols, eig_vals, d_work.data(), lwork,
d_dev_info.data(), stream));
CUDA_CHECK(cudaGetLastError());

auto dev_info = d_dev_info.value(stream);
ASSERT(dev_info == 0,
"eig.cuh: eigensolver couldn't converge to a solution. "
"This usually occurs when some of the features do not vary enough.");
}

/**
* @defgroup eig decomp with divide and conquer method for the column-major
* symmetric matrices
Expand All @@ -42,31 +70,43 @@ namespace linalg {
* @{
*/
template <typename math_t>
void eigDC(const raft::handle_t &handle, const math_t *in, int n_rows,
int n_cols, math_t *eig_vectors, math_t *eig_vals,
void eigDC(const raft::handle_t &handle, const math_t *in, std::size_t n_rows,
std::size_t n_cols, math_t *eig_vectors, math_t *eig_vals,
cudaStream_t stream) {
#if CUDART_VERSION < 11010
eigDC_legacy(handle, in, n_rows, n_cols, eig_vectors, eig_vals, stream);
#else
cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();

int lwork;
CUSOLVER_CHECK(cusolverDnsyevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR,
CUBLAS_FILL_MODE_UPPER, n_rows, in,
n_cols, eig_vals, &lwork));
cusolverDnParams_t dn_params = nullptr;
CUSOLVER_CHECK(cusolverDnCreateParams(&dn_params));

rmm::device_uvector<math_t> d_work(lwork, stream);
size_t workspaceDevice = 0;
size_t workspaceHost = 0;
CUSOLVER_CHECK(cusolverDnxsyevd_bufferSize(
cusolverH, dn_params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER,
static_cast<int64_t>(n_rows), eig_vectors, static_cast<int64_t>(n_cols),
eig_vals, &workspaceDevice, &workspaceHost, stream));

rmm::device_uvector<math_t> d_work(workspaceDevice / sizeof(math_t), stream);
rmm::device_scalar<int> d_dev_info(stream);
std::vector<math_t> h_work(workspaceHost / sizeof(math_t));

raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream);

CUSOLVER_CHECK(cusolverDnsyevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR,
CUBLAS_FILL_MODE_UPPER, n_rows, eig_vectors,
n_cols, eig_vals, d_work.data(), lwork,
d_dev_info.data(), stream));
CUDA_CHECK(cudaGetLastError());
CUSOLVER_CHECK(cusolverDnxsyevd(
cusolverH, dn_params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER,
static_cast<int64_t>(n_rows), eig_vectors, static_cast<int64_t>(n_cols),
eig_vals, d_work.data(), workspaceDevice, h_work.data(), workspaceHost,
d_dev_info.data(), stream));

CUDA_CHECK(cudaGetLastError());
CUSOLVER_CHECK(cusolverDnDestroyParams(dn_params));
int dev_info = d_dev_info.value(stream);
ASSERT(dev_info == 0,
"eig.cuh: eigensolver couldn't converge to a solution. "
"This usually occurs when some of the features do not vary enough.");
#endif
}

enum EigVecMemUsage { OVERWRITE_INPUT, COPY_INPUT };
Expand Down Expand Up @@ -155,15 +195,17 @@ void eigSelDC(const raft::handle_t &handle, math_t *in, int n_rows, int n_cols,
* @{
*/
template <typename math_t>
void eigJacobi(const raft::handle_t &handle, const math_t *in, int n_rows,
int n_cols, math_t *eig_vectors, math_t *eig_vals,
cudaStream_t stream, math_t tol = 1.e-7, int sweeps = 15) {
void eigJacobi(const raft::handle_t &handle, const math_t *in,
std::size_t n_rows, std::size_t n_cols, math_t *eig_vectors,
math_t *eig_vals, cudaStream_t stream, math_t tol = 1.e-7,
std::uint32_t sweeps = 15) {
cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();

syevjInfo_t syevj_params = nullptr;
CUSOLVER_CHECK(cusolverDnCreateSyevjInfo(&syevj_params));
CUSOLVER_CHECK(cusolverDnXsyevjSetTolerance(syevj_params, tol));
CUSOLVER_CHECK(cusolverDnXsyevjSetMaxSweeps(syevj_params, sweeps));
CUSOLVER_CHECK(
cusolverDnXsyevjSetMaxSweeps(syevj_params, static_cast<int>(sweeps)));

int lwork;
CUSOLVER_CHECK(cusolverDnsyevj_bufferSize(
Expand Down

0 comments on commit 3848fa7

Please sign in to comment.