diff --git a/openTSNE/quad_tree.pyx b/openTSNE/quad_tree.pyx index 16720aae..08bae6a0 100644 --- a/openTSNE/quad_tree.pyx +++ b/openTSNE/quad_tree.pyx @@ -124,7 +124,7 @@ cdef void split_node(Node * node): PyMem_Free(new_center) -cdef inline bint is_duplicate(Node * node, double * point, double duplicate_eps=1e-6) nogil: +cdef inline bint is_duplicate(Node * node, double * point, double duplicate_eps=1e-16) nogil: cdef Py_ssize_t d for d in range(node.n_dims): if fabs(node.center_of_mass[d] - point[d]) >= duplicate_eps: diff --git a/tests/test_correctness.py b/tests/test_correctness.py index 1be5ad4c..5e1f1759 100644 --- a/tests/test_correctness.py +++ b/tests/test_correctness.py @@ -318,3 +318,18 @@ def test_spectral_agreement_with_sklearn(self): np.testing.assert_almost_equal( np.abs(np.corrcoef(embedding1[:,1], embedding2[:,1])[0,1]), 1 ) + + +class TestEarlyExaggerationCollapse(unittest.TestCase): + """In some cases, the BH implementation was producing a collapsed embedding + for all data points. For more information, see #233, #234.""" + def test_early_exaggeration_does_not_collapse(self): + n_samples = [100, 150, 200] + n_dims = [5, 10, 20] + + np.random.seed(42) + for n in n_samples: + for d in n_dims: + x = np.random.randn(n, d) + embedding = openTSNE.TSNE(random_state=42).fit(x) + self.assertGreater(np.max(np.abs(embedding)), 1e-8)