From 35bad8bbeb4ec05a3b6204a5a5c5f417464a0932 Mon Sep 17 00:00:00 2001 From: Lev Konstantinovskiy Date: Thu, 4 Aug 2016 23:46:19 +0200 Subject: [PATCH] Bugfix: Full2sparse clipped to use abs value (#811) * Fix full2sparse_clipped to return he `topn` elements of the greatest magnitude (abs). Previously it was returing the greatest elements(without abs). * ab --- gensim/matutils.py | 2 +- gensim/test/test_similarities.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/gensim/matutils.py b/gensim/matutils.py index 6045c28b14..bd7c92c688 100644 --- a/gensim/matutils.py +++ b/gensim/matutils.py @@ -238,7 +238,7 @@ def full2sparse_clipped(vec, topn, eps=1e-9): return [] vec = numpy.asarray(vec, dtype=float) nnz = numpy.nonzero(abs(vec) > eps)[0] - biggest = nnz.take(argsort(vec.take(nnz), topn, reverse=True)) + biggest = nnz.take(argsort(abs(vec).take(nnz), topn, reverse=True)) return list(zip(biggest, vec.take(biggest))) diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index 6d822b7141..b492b4a9b4 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -101,6 +101,13 @@ def testNumBest(self): for num_best in [None, 0, 1, 9, 1000]: self.testFull(num_best=num_best) + def test_full2sparse_clipped(self): + + vec = [0.8, 0.2, 0.0, 0.0, -0.1, -0.15] + expected = [(0, 0.80000000000000004), (1, 0.20000000000000001), (5, -0.14999999999999999)] + self.assertTrue(matutils.full2sparse_clipped(vec, topn=3), expected) + + def testChunking(self): if self.cls == similarities.Similarity: