Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use 64 bit CuSolver API for Eigen decomposition #349

Merged
merged 10 commits into from
Nov 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions cpp/include/raft/linalg/cusolver_wrappers.h
Original file line number Diff line number Diff line change
Expand Up @@ -719,5 +719,76 @@ inline cusolverStatus_t cusolverSpcsrqrsvBatched( // NOLINT
}
/** @} */

#if CUDART_VERSION >= 11010
/**
* @defgroup DnXsyevd cusolver DnXsyevd operations
* @{
*/
template <typename T>
cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT
cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz,
cublasFillMode_t uplo, int64_t n, const T *A, int64_t lda, const T *W,
size_t *workspaceInBytesOnDevice, size_t *workspaceInBytesOnHost,
cudaStream_t stream);

template <>
inline cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT
cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz,
cublasFillMode_t uplo, int64_t n, const float *A, int64_t lda, const float *W,
size_t *workspaceInBytesOnDevice, size_t *workspaceInBytesOnHost,
cudaStream_t stream) {
CUSOLVER_CHECK(cusolverDnSetStream(handle, stream));
return cusolverDnXsyevd_bufferSize(
handle, params, jobz, uplo, n, CUDA_R_32F, A, lda, CUDA_R_32F, W,
CUDA_R_32F, workspaceInBytesOnDevice, workspaceInBytesOnHost);
}

template <>
inline cusolverStatus_t cusolverDnxsyevd_bufferSize( // NOLINT
cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz,
cublasFillMode_t uplo, int64_t n, const double *A, int64_t lda,
const double *W, size_t *workspaceInBytesOnDevice,
size_t *workspaceInBytesOnHost, cudaStream_t stream) {
CUSOLVER_CHECK(cusolverDnSetStream(handle, stream));
return cusolverDnXsyevd_bufferSize(
handle, params, jobz, uplo, n, CUDA_R_64F, A, lda, CUDA_R_64F, W,
CUDA_R_64F, workspaceInBytesOnDevice, workspaceInBytesOnHost);
}

template <typename T>
cusolverStatus_t cusolverDnxsyevd( // NOLINT
cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz,
cublasFillMode_t uplo, int64_t n, T *A, int64_t lda, T *W, T *bufferOnDevice,
size_t workspaceInBytesOnDevice, T *bufferOnHost,
size_t workspaceInBytesOnHost, int *info, cudaStream_t stream);

template <>
inline cusolverStatus_t cusolverDnxsyevd( // NOLINT
cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz,
cublasFillMode_t uplo, int64_t n, float *A, int64_t lda, float *W,
float *bufferOnDevice, size_t workspaceInBytesOnDevice, float *bufferOnHost,
size_t workspaceInBytesOnHost, int *info, cudaStream_t stream) {
CUSOLVER_CHECK(cusolverDnSetStream(handle, stream));
return cusolverDnXsyevd(handle, params, jobz, uplo, n, CUDA_R_32F, A, lda,
CUDA_R_32F, W, CUDA_R_32F, bufferOnDevice,
workspaceInBytesOnDevice, bufferOnHost,
workspaceInBytesOnHost, info);
}

template <>
inline cusolverStatus_t cusolverDnxsyevd( // NOLINT
cusolverDnHandle_t handle, cusolverDnParams_t params, cusolverEigMode_t jobz,
cublasFillMode_t uplo, int64_t n, double *A, int64_t lda, double *W,
double *bufferOnDevice, size_t workspaceInBytesOnDevice, double *bufferOnHost,
size_t workspaceInBytesOnHost, int *info, cudaStream_t stream) {
CUSOLVER_CHECK(cusolverDnSetStream(handle, stream));
return cusolverDnXsyevd(handle, params, jobz, uplo, n, CUDA_R_64F, A, lda,
CUDA_R_64F, W, CUDA_R_64F, bufferOnDevice,
workspaceInBytesOnDevice, bufferOnHost,
workspaceInBytesOnHost, info);
}
/** @} */
#endif

} // namespace linalg
} // namespace raft
74 changes: 58 additions & 16 deletions cpp/include/raft/linalg/eig.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,34 @@
namespace raft {
namespace linalg {

template <typename math_t>
void eigDC_legacy(const raft::handle_t &handle, const math_t *in,
std::size_t n_rows, std::size_t n_cols, math_t *eig_vectors,
math_t *eig_vals, cudaStream_t stream) {
cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();

int lwork;
CUSOLVER_CHECK(cusolverDnsyevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR,
CUBLAS_FILL_MODE_UPPER, n_rows, in,
n_cols, eig_vals, &lwork));

rmm::device_uvector<math_t> d_work(lwork, stream);
rmm::device_scalar<int> d_dev_info(stream);

raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream);

CUSOLVER_CHECK(cusolverDnsyevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR,
CUBLAS_FILL_MODE_UPPER, n_rows, eig_vectors,
n_cols, eig_vals, d_work.data(), lwork,
d_dev_info.data(), stream));
CUDA_CHECK(cudaGetLastError());

auto dev_info = d_dev_info.value(stream);
ASSERT(dev_info == 0,
"eig.cuh: eigensolver couldn't converge to a solution. "
"This usually occurs when some of the features do not vary enough.");
}

/**
* @defgroup eig decomp with divide and conquer method for the column-major
* symmetric matrices
Expand All @@ -42,31 +70,43 @@ namespace linalg {
* @{
*/
template <typename math_t>
void eigDC(const raft::handle_t &handle, const math_t *in, int n_rows,
int n_cols, math_t *eig_vectors, math_t *eig_vals,
void eigDC(const raft::handle_t &handle, const math_t *in, std::size_t n_rows,
std::size_t n_cols, math_t *eig_vectors, math_t *eig_vals,
cudaStream_t stream) {
#if CUDART_VERSION < 11010
eigDC_legacy(handle, in, n_rows, n_cols, eig_vectors, eig_vals, stream);
#else
cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();

int lwork;
CUSOLVER_CHECK(cusolverDnsyevd_bufferSize(cusolverH, CUSOLVER_EIG_MODE_VECTOR,
CUBLAS_FILL_MODE_UPPER, n_rows, in,
n_cols, eig_vals, &lwork));
cusolverDnParams_t dn_params = nullptr;
CUSOLVER_CHECK(cusolverDnCreateParams(&dn_params));

rmm::device_uvector<math_t> d_work(lwork, stream);
size_t workspaceDevice = 0;
size_t workspaceHost = 0;
CUSOLVER_CHECK(cusolverDnxsyevd_bufferSize(
cusolverH, dn_params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER,
static_cast<int64_t>(n_rows), eig_vectors, static_cast<int64_t>(n_cols),
eig_vals, &workspaceDevice, &workspaceHost, stream));

rmm::device_uvector<math_t> d_work(workspaceDevice / sizeof(math_t), stream);
rmm::device_scalar<int> d_dev_info(stream);
std::vector<math_t> h_work(workspaceHost / sizeof(math_t));

raft::matrix::copy(in, eig_vectors, n_rows, n_cols, stream);

CUSOLVER_CHECK(cusolverDnsyevd(cusolverH, CUSOLVER_EIG_MODE_VECTOR,
CUBLAS_FILL_MODE_UPPER, n_rows, eig_vectors,
n_cols, eig_vals, d_work.data(), lwork,
d_dev_info.data(), stream));
CUDA_CHECK(cudaGetLastError());
CUSOLVER_CHECK(cusolverDnxsyevd(
cusolverH, dn_params, CUSOLVER_EIG_MODE_VECTOR, CUBLAS_FILL_MODE_UPPER,
static_cast<int64_t>(n_rows), eig_vectors, static_cast<int64_t>(n_cols),
eig_vals, d_work.data(), workspaceDevice, h_work.data(), workspaceHost,
d_dev_info.data(), stream));

CUDA_CHECK(cudaGetLastError());
CUSOLVER_CHECK(cusolverDnDestroyParams(dn_params));
int dev_info = d_dev_info.value(stream);
ASSERT(dev_info == 0,
"eig.cuh: eigensolver couldn't converge to a solution. "
"This usually occurs when some of the features do not vary enough.");
#endif
}

enum EigVecMemUsage { OVERWRITE_INPUT, COPY_INPUT };
Expand Down Expand Up @@ -155,15 +195,17 @@ void eigSelDC(const raft::handle_t &handle, math_t *in, int n_rows, int n_cols,
* @{
*/
template <typename math_t>
void eigJacobi(const raft::handle_t &handle, const math_t *in, int n_rows,
int n_cols, math_t *eig_vectors, math_t *eig_vals,
cudaStream_t stream, math_t tol = 1.e-7, int sweeps = 15) {
void eigJacobi(const raft::handle_t &handle, const math_t *in,
std::size_t n_rows, std::size_t n_cols, math_t *eig_vectors,
math_t *eig_vals, cudaStream_t stream, math_t tol = 1.e-7,
std::uint32_t sweeps = 15) {
cusolverDnHandle_t cusolverH = handle.get_cusolver_dn_handle();

syevjInfo_t syevj_params = nullptr;
CUSOLVER_CHECK(cusolverDnCreateSyevjInfo(&syevj_params));
CUSOLVER_CHECK(cusolverDnXsyevjSetTolerance(syevj_params, tol));
CUSOLVER_CHECK(cusolverDnXsyevjSetMaxSweeps(syevj_params, sweeps));
CUSOLVER_CHECK(
cusolverDnXsyevjSetMaxSweeps(syevj_params, static_cast<int>(sweeps)));

int lwork;
CUSOLVER_CHECK(cusolverDnsyevj_bufferSize(
Expand Down