Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Obey initialize_embeddings parameter in B-H tSNE #3011

Merged
merged 3 commits into from
Oct 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

## Bug Fixes
- PR #2983: Fix seeding of KISS99 RNG
- PR #3011: Fix unused initialize_embeddings parameter in Barnes-Hut t-SNE
- PR #3008: Check number of columns in check_array validator
- PR #3012: Increasing learning rate for SGD log loss and invscaling pytests

Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cuml/manifold/tsne.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ namespace ML {
* or >= 0 for reproducible outputs.
* @param[in] verbosity verbosity level for logging messages during
* execution
* @param[in] intialize_embeddings Whether to overwrite the current Y vector
* @param[in] initialize_embeddings Whether to overwrite the current Y vector
* with random noise.
* @param[in] barnes_hut Whether to use the fast Barnes Hut or use the
* slower exact version.
Expand All @@ -85,6 +85,6 @@ void TSNE_fit(const raft::handle_t &handle, const float *X, float *Y,
const float pre_momentum = 0.5, const float post_momentum = 0.8,
const long long random_state = -1,
int verbosity = CUML_LEVEL_INFO,
const bool intialize_embeddings = true, bool barnes_hut = true);
const bool initialize_embeddings = true, bool barnes_hut = true);

} // namespace ML
14 changes: 10 additions & 4 deletions cpp/src/tsne/barnes_hut.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ namespace TSNE {
* @param[in] pre_momentum: The momentum used during the exaggeration phase.
* @param[in] post_momentum: The momentum used after the exaggeration phase.
* @param[in] random_state: Set this to -1 for pure random intializations or >= 0 for reproducible outputs.
* @param[in] initialize_embeddings: Whether to overwrite the current Y vector with random noise.
*/
void Barnes_Hut(float *VAL, const int *COL, const int *ROW, const int NNZ,
const raft::handle_t &handle, float *Y, const int n,
Expand All @@ -56,7 +57,8 @@ void Barnes_Hut(float *VAL, const int *COL, const int *ROW, const int NNZ,
const float post_learning_rate = 500.0f,
const int max_iter = 1000, const float min_grad_norm = 1e-7,
const float pre_momentum = 0.5, const float post_momentum = 0.8,
const long long random_state = -1) {
const long long random_state = -1,
const bool initialize_embeddings = true) {
auto d_alloc = handle.get_device_allocator();
cudaStream_t stream = handle.get_stream();

Expand Down Expand Up @@ -131,9 +133,13 @@ void Barnes_Hut(float *VAL, const int *COL, const int *ROW, const int NNZ,
cudaMemsetAsync(old_forces.data(), 0, sizeof(float) * n * 2, stream));

MLCommon::device_buffer<float> YY(d_alloc, stream, (nnodes + 1) * 2);
// TODO bug #2549: this should be conditional on bool initialize_embeddings.
random_vector(YY.data(), -0.0001f, 0.0001f, (nnodes + 1) * 2, stream,
random_state);
if (initialize_embeddings) {
random_vector(YY.data(), -0.0001f, 0.0001f, (nnodes + 1) * 2, stream,
random_state);
} else {
raft::copy(YY.data(), Y, n, stream);
raft::copy(YY.data() + nnodes + 1, Y + n, n, stream);
}

// Set cache levels for faster algorithm execution
//---------------------------------------------------
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/tsne/exact_tsne.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ namespace TSNE {
* @param[in] pre_momentum: The momentum used during the exaggeration phase.
* @param[in] post_momentum: The momentum used after the exaggeration phase.
* @param[in] random_state: Set this to -1 for pure random intializations or >= 0 for reproducible outputs.
* @param[in] intialize_embeddings: Whether to overwrite the current Y vector with random noise.
* @param[in] initialize_embeddings: Whether to overwrite the current Y vector with random noise.
*/
void Exact_TSNE(float *VAL, const int *COL, const int *ROW, const int NNZ,
const raft::handle_t &handle, float *Y, const int n,
Expand All @@ -55,11 +55,11 @@ void Exact_TSNE(float *VAL, const int *COL, const int *ROW, const int NNZ,
const int max_iter = 1000, const float min_grad_norm = 1e-7,
const float pre_momentum = 0.5, const float post_momentum = 0.8,
const long long random_state = -1,
const bool intialize_embeddings = true) {
const bool initialize_embeddings = true) {
auto d_alloc = handle.get_device_allocator();
cudaStream_t stream = handle.get_stream();

if (intialize_embeddings)
if (initialize_embeddings)
random_vector(Y, -0.0001f, 0.0001f, n * dim, stream, random_state);

// Allocate space
Expand Down
8 changes: 5 additions & 3 deletions cpp/src/tsne/tsne.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ void TSNE_fit(const raft::handle_t &handle, const float *X, float *Y,
const float post_learning_rate, const int max_iter,
const float min_grad_norm, const float pre_momentum,
const float post_momentum, const long long random_state,
int verbosity, const bool intialize_embeddings, bool barnes_hut) {
int verbosity, const bool initialize_embeddings,
bool barnes_hut) {
ASSERT(n > 0 && p > 0 && dim > 0 && n_neighbors > 0 && X != NULL && Y != NULL,
"Wrong input args");
ML::Logger::get().setLevel(verbosity);
Expand Down Expand Up @@ -118,12 +119,13 @@ void TSNE_fit(const raft::handle_t &handle, const float *X, float *Y,
TSNE::Barnes_Hut(VAL, COL, ROW, NNZ, handle, Y, n, theta, epssq,
early_exaggeration, exaggeration_iter, min_gain,
pre_learning_rate, post_learning_rate, max_iter,
min_grad_norm, pre_momentum, post_momentum, random_state);
min_grad_norm, pre_momentum, post_momentum, random_state,
initialize_embeddings);
} else {
TSNE::Exact_TSNE(VAL, COL, ROW, NNZ, handle, Y, n, dim, early_exaggeration,
exaggeration_iter, min_gain, pre_learning_rate,
post_learning_rate, max_iter, min_grad_norm, pre_momentum,
post_momentum, random_state, intialize_embeddings);
post_momentum, random_state, initialize_embeddings);
}
}

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/manifold/t_sne.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ cdef extern from "cuml/manifold/tsne.h" namespace "ML" nogil:
const float post_momentum,
const long long random_state,
int verbosity,
const bool intialize_embeddings,
const bool initialize_embeddings,
bool barnes_hut) except +


Expand Down