From 2d89ef1b5d7052a69f405a64cc39fdb6d7ce7341 Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Wed, 23 Nov 2016 15:17:43 +0100 Subject: [PATCH 1/4] Optimised show_topics --- gensim/models/ldamodel.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index a1f469aaae..9f1c72fcca 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -800,15 +800,24 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True): chosen_topics = sorted_topics[:num_topics // 2] + sorted_topics[-num_topics // 2:] shown = [] + + topic = self.state.get_lambda() for i in chosen_topics: if formatted: - topic = self.print_topic(i, topn=num_words) + topic_ = topic[i] + topic_ = topic_ / topic_.sum() # normalize to probability distribution + bestn = matutils.argsort(topic_, num_words, reverse=True) + topic_ = [(self.id2word[id], topic_[id]) for id in bestn] + topic_ = ' + '.join(['%.3f*"%s"' % (v, k) for k, v in topic_]) else: - topic = self.show_topic(i, topn=num_words) + topic_ = topic_[i] + topic_ = topic_ / topic_.sum() # normalize to probability distribution + bestn = matutils.argsort(topic_, num_words, reverse=True) + topic_ = [(self.id2word[id], topic_[id]) for id in bestn] - shown.append((i, topic)) + shown.append((i, topic_)) if log: - logger.info("topic #%i (%.3f): %s", i, self.alpha[i], topic) + logger.info("topic #%i (%.3f): %s", i, self.alpha[i], topic_) return shown From dc4c91c724077e469ad636a6202735258eebac00 Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Wed, 23 Nov 2016 15:43:36 +0100 Subject: [PATCH 2/4] Fixed test --- gensim/models/ldamodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index 9f1c72fcca..490fa75dae 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -810,7 +810,7 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True): topic_ = [(self.id2word[id], topic_[id]) for id in bestn] topic_ = ' + '.join(['%.3f*"%s"' % (v, k) for k, v in topic_]) else: - topic_ = topic_[i] + topic_ = topic[i] topic_ = topic_ / topic_.sum() # normalize to probability distribution bestn = matutils.argsort(topic_, num_words, reverse=True) topic_ = [(self.id2word[id], topic_[id]) for id in bestn] From 18ebc6d30366f8951b5cca26c7a36c09c8d0bbec Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Thu, 24 Nov 2016 13:50:58 +0100 Subject: [PATCH 3/4] Removed code duplication --- gensim/models/ldamodel.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index 490fa75dae..2bd508dfba 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -803,17 +803,12 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True): topic = self.state.get_lambda() for i in chosen_topics: - if formatted: topic_ = topic[i] topic_ = topic_ / topic_.sum() # normalize to probability distribution bestn = matutils.argsort(topic_, num_words, reverse=True) topic_ = [(self.id2word[id], topic_[id]) for id in bestn] + if formatted: topic_ = ' + '.join(['%.3f*"%s"' % (v, k) for k, v in topic_]) - else: - topic_ = topic[i] - topic_ = topic_ / topic_.sum() # normalize to probability distribution - bestn = matutils.argsort(topic_, num_words, reverse=True) - topic_ = [(self.id2word[id], topic_[id]) for id in bestn] shown.append((i, topic_)) if log: From 62436ea5e08dae3e74217514b998b4a03f246904 Mon Sep 17 00:00:00 2001 From: Bhargav Srinivasa Date: Thu, 24 Nov 2016 14:02:52 +0100 Subject: [PATCH 4/4] Fixed tests --- gensim/models/ldamodel.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index 2bd508dfba..97f3921d05 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -803,10 +803,10 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True): topic = self.state.get_lambda() for i in chosen_topics: - topic_ = topic[i] - topic_ = topic_ / topic_.sum() # normalize to probability distribution - bestn = matutils.argsort(topic_, num_words, reverse=True) - topic_ = [(self.id2word[id], topic_[id]) for id in bestn] + topic_ = topic[i] + topic_ = topic_ / topic_.sum() # normalize to probability distribution + bestn = matutils.argsort(topic_, num_words, reverse=True) + topic_ = [(self.id2word[id], topic_[id]) for id in bestn] if formatted: topic_ = ' + '.join(['%.3f*"%s"' % (v, k) for k, v in topic_])