From f9803cba2897bbd62594ca100cebefdaa006c719 Mon Sep 17 00:00:00 2001 From: samyak jain Date: Mon, 8 Jan 2018 13:26:19 +0530 Subject: [PATCH 1/2] bm25 scoring function updated --- gensim/summarization/bm25.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/summarization/bm25.py b/gensim/summarization/bm25.py index ec484949cf..0c37e4f6a3 100644 --- a/gensim/summarization/bm25.py +++ b/gensim/summarization/bm25.py @@ -122,7 +122,7 @@ def get_score(self, document, index, average_idf): continue idf = self.idf[word] if self.idf[word] >= 0 else EPSILON * average_idf score += (idf * self.f[index][word] * (PARAM_K1 + 1) - / (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * len(document) / self.avgdl))) + / (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * len(corpus[index]) / self.avgdl))) return score def get_scores(self, document, average_idf): From 063010189728729533010e1171d3df33f2c3aee6 Mon Sep 17 00:00:00 2001 From: samyak jain Date: Mon, 8 Jan 2018 16:04:49 +0530 Subject: [PATCH 2/2] Fixes #1828 --- gensim/summarization/bm25.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/summarization/bm25.py b/gensim/summarization/bm25.py index 0c37e4f6a3..c940c08edd 100644 --- a/gensim/summarization/bm25.py +++ b/gensim/summarization/bm25.py @@ -122,7 +122,7 @@ def get_score(self, document, index, average_idf): continue idf = self.idf[word] if self.idf[word] >= 0 else EPSILON * average_idf score += (idf * self.f[index][word] * (PARAM_K1 + 1) - / (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * len(corpus[index]) / self.avgdl))) + / (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * len(self.corpus[index]) / self.avgdl))) return score def get_scores(self, document, average_idf):