From 848e5ee5c94b49b987e04eb59610adeab67d50a5 Mon Sep 17 00:00:00 2001
From: samyak jain <mnsn1970@gmail.com>
Date: Mon, 8 Jan 2018 19:00:11 +0530
Subject: [PATCH] Fixes #1828

---
 gensim/summarization/bm25.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gensim/summarization/bm25.py b/gensim/summarization/bm25.py
index 39caba1f80..ce673b970a 100644
--- a/gensim/summarization/bm25.py
+++ b/gensim/summarization/bm25.py
@@ -78,12 +78,14 @@ def __init__(self, corpus):
         self.f = []
         self.df = {}
         self.idf = {}
+        self.doc_length = []
         self.initialize()
 
     def initialize(self):
         """Calculates frequencies of terms in documents and in corpus. Also computes inverse document frequencies."""
         for document in self.corpus:
             frequencies = {}
+            doc_length.append(len(document))
             for word in document:
                 if word not in frequencies:
                     frequencies[word] = 0
@@ -121,9 +123,8 @@ def get_score(self, document, index, average_idf):
             if word not in self.f[index]:
                 continue
             idf = self.idf[word] if self.idf[word] >= 0 else EPSILON * average_idf
-            doc_length = len(self.corpus[index])
             score += (idf * self.f[index][word] * (PARAM_K1 + 1)
-                      / (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * doc_length / self.avgdl)))
+                      / (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * doc_length[index] / self.avgdl)))
         return score
 
     def get_scores(self, document, average_idf):