Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose simplicial set functions #4711

Merged
merged 14 commits into from
May 24, 2022
12 changes: 9 additions & 3 deletions cpp/bench/sg/umap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ class UmapSupervised : public UmapBase {
protected:
void coreBenchmarkMethod()
{
auto graph = raft::sparse::COO<float, int>(stream);
UMAP::fit(*this->handle,
this->data.X.data(),
yFloat,
Expand All @@ -123,7 +124,8 @@ class UmapSupervised : public UmapBase {
nullptr,
nullptr,
&uParams,
embeddings);
embeddings,
&graph);
}
};
ML_BENCH_REGISTER(Params, UmapSupervised, "blobs", getInputs());
Expand All @@ -135,6 +137,7 @@ class UmapUnsupervised : public UmapBase {
protected:
void coreBenchmarkMethod()
{
auto graph = raft::sparse::COO<float, int>(stream);
UMAP::fit(*this->handle,
this->data.X.data(),
nullptr,
Expand All @@ -143,7 +146,8 @@ class UmapUnsupervised : public UmapBase {
nullptr,
nullptr,
&uParams,
embeddings);
embeddings,
&graph);
}
};
ML_BENCH_REGISTER(Params, UmapUnsupervised, "blobs", getInputs());
Expand Down Expand Up @@ -173,6 +177,7 @@ class UmapTransform : public UmapBase {
UmapBase::allocateBuffers(state);
auto& handle = *this->handle;
alloc(transformed, this->params.nrows * uParams.n_components);
auto graph = raft::sparse::COO<float, int>(stream);
UMAP::fit(handle,
this->data.X.data(),
yFloat,
Expand All @@ -181,7 +186,8 @@ class UmapTransform : public UmapBase {
nullptr,
nullptr,
&uParams,
embeddings);
embeddings,
&graph);
}
void deallocateBuffers(const ::benchmark::State& state)
{
Expand Down
204 changes: 141 additions & 63 deletions cpp/include/cuml/manifold/umap.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,6 +30,126 @@ namespace ML {
class UMAPParams;
namespace UMAP {

/**
* Returns the simplical set to be consumed by the ML::UMAP::refine function.
*
* @param[in] handle: raft::handle_t
* @param[out] params: pointer to ML::UMAPParams object of which the a and b parameters will be
* updated
*/
void find_ab(const raft::handle_t& handle, UMAPParams* params);

/**
* Returns the simplical set to be consumed by the ML::UMAP::refine function.
*
* @param[in] handle: raft::handle_t
* @param[in] X: pointer to input array
* @param[in] y: pointer to labels array
* @param[in] n: n_samples of input array
* @param[in] d: n_features of input array
* @param[in] knn_indices: pointer to knn_indices (optional)
* @param[in] knn_dists: pointer to knn_dists (optional)
* @param[in] params: pointer to ML::UMAPParams object
* @return: simplical set as a unique pointer to a raft::sparse::COO object
*/
std::unique_ptr<raft::sparse::COO<float, int>> get_graph(const raft::handle_t& handle,
cjnolet marked this conversation as resolved.
Show resolved Hide resolved
float* X, // input matrix
float* y, // labels
int n,
int d,
int64_t* knn_indices,
float* knn_dists,
UMAPParams* params);

/**
* Performs a UMAP fit on existing embeddings without reinitializing them, which enables
* iterative fitting without callbacks.
*
* @param[in] handle: raft::handle_t
* @param[in] X: pointer to input array
* @param[in] n: n_samples of input array
* @param[in] d: n_features of input array
* @param[in] graph: pointer to raft::sparse::COO object computed using ML::UMAP::get_graph
* @param[in] params: pointer to ML::UMAPParams object
* @param[out] embeddings: pointer to current embedding with shape n * n_components, stores updated
* embeddings on executing refine
*/
void refine(const raft::handle_t& handle,
float* X,
int n,
int d,
raft::sparse::COO<float, int>* graph,
UMAPParams* params,
float* embeddings);

/**
* Dense fit
*
* @param[in] handle: raft::handle_t
* @param[in] X: pointer to input array
* @param[in] y: pointer to labels array
* @param[in] n: n_samples of input array
* @param[in] d: n_features of input array
* @param[in] knn_indices: pointer to knn_indices of input (optional)
* @param[in] knn_dists: pointer to knn_dists of input (optional)
* @param[in] params: pointer to ML::UMAPParams object
* @param[out] embeddings: pointer to embedding produced through projection
* @param[out] graph: pointer to fuzzy simplicial set graph
*/
void fit(const raft::handle_t& handle,
float* X,
float* y,
int n,
int d,
int64_t* knn_indices,
float* knn_dists,
UMAPParams* params,
float* embeddings,
raft::sparse::COO<float, int>* graph);

/**
* Sparse fit
*
* @param[in] handle: raft::handle_t
* @param[in] indptr: pointer to index pointer array of input array
* @param[in] indices: pointer to index array of input array
* @param[in] data: pointer to data array of input array
* @param[in] nnz: pointer to data array of input array
* @param[in] y: pointer to labels array
* @param[in] n: n_samples of input array
* @param[in] d: n_features of input array
* @param[in] params: pointer to ML::UMAPParams object
* @param[out] embeddings: pointer to embedding produced through projection
* @param[out] graph: pointer to fuzzy simplicial set graph
*/
void fit_sparse(const raft::handle_t& handle,
int* indptr,
int* indices,
float* data,
size_t nnz,
float* y,
int n,
int d,
UMAPParams* params,
float* embeddings,
raft::sparse::COO<float, int>* graph);

/**
* Dense transform
*
* @param[in] handle: raft::handle_t
* @param[in] X: pointer to input array to be infered
* @param[in] n: n_samples of input array to be infered
* @param[in] d: n_features of input array to be infered
* @param[in] knn_indices: pointer to knn_indices of input (optional)
* @param[in] knn_dists: pointer to knn_dists of input (optional)
* @param[in] orig_X: pointer to original training array
* @param[in] orig_n: number of rows in original training array
* @param[in] embedding: pointer to embedding created during training
* @param[in] embedding_n: number of rows in embedding created during training
* @param[in] params: pointer to ML::UMAPParams object
* @param[out] transformed: pointer to embedding produced through projection
*/
void transform(const raft::handle_t& handle,
float* X,
int n,
Expand All @@ -43,6 +163,26 @@ void transform(const raft::handle_t& handle,
UMAPParams* params,
float* transformed);

/**
* Sparse transform
*
* @param[in] handle: raft::handle_t
* @param[in] indptr: pointer to index pointer array of input array to be infered
* @param[in] indices: pointer to index array of input array to be infered
* @param[in] data: pointer to data array of input array to be infered
* @param[in] nnz: number of stored values of input array to be infered
* @param[in] n: n_samples of input array
* @param[in] d: n_features of input array
* @param[in] orig_x_indptr: pointer to index pointer array of original training array
* @param[in] orig_x_indices: pointer to index array of original training array
* @param[in] orig_x_data: pointer to data array of original training array
* @param[in] orig_nnz: number of stored values of original training array
* @param[in] orig_n: number of rows in original training array
* @param[in] embedding: pointer to embedding created during training
* @param[in] embedding_n: number of rows in embedding created during training
* @param[in] params: pointer to ML::UMAPParams object
* @param[out] transformed: pointer to embedding produced through projection
*/
void transform_sparse(const raft::handle_t& handle,
int* indptr,
int* indices,
Expand All @@ -60,67 +200,5 @@ void transform_sparse(const raft::handle_t& handle,
UMAPParams* params,
float* transformed);

void find_ab(const raft::handle_t& handle, UMAPParams* params);

void fit(const raft::handle_t& handle,
float* X, // input matrix
float* y, // labels
int n,
int d,
int64_t* knn_indices,
float* knn_dists,
UMAPParams* params,
float* embeddings);

/**
* refine performs a UMAP fit on existing embeddings without reinitializing them, which enables
* iterative fitting without callbacks.
*
* @param handle: raft::handle_t
* @param X: pointer to input array
* @param n: n_samples of input array
* @param d: n_features of input array
* @param cgraph_coo: pointer to raft::sparse::COO object computed using ML::UMAP::get_graph
* @param params: pointer to ML::UMAPParams object
* @param embeddings: pointer to current embedding with shape n * n_components, stores updated
* embeddings on executing refine
*/
void refine(const raft::handle_t& handle,
float* X, // input matrix
int n,
int d,
raft::sparse::COO<float, int>* cgraph_coo,
UMAPParams* params,
float* embeddings);

/**
* returns a simplical set as a raft::sparse:COO object to be consumed by the ML::UMAP::refine
* function.
*
* @param handle: raft::handle_t
* @param X: pointer to input array
* @param y: pointer to labels array
* @param n: n_samples of input array
* @param d: n_features of input array
* @param params: pointer to ML::UMAPParams object
* @return: simplical set (pointer to raft::sparse::COO object)
*/
std::unique_ptr<raft::sparse::COO<float, int>> get_graph(const raft::handle_t& handle,
float* X, // input matrix
float* y, // labels
int n,
int d,
UMAPParams* params);

void fit_sparse(const raft::handle_t& handle,
int* indptr, // input matrix
int* indices,
float* data,
size_t nnz,
float* y,
int n, // rows
int d, // cols
UMAPParams* params,
float* embeddings);
} // namespace UMAP
} // namespace ML
Loading