From d9b098af764f2a333eabe53e27d881d5ebb1e14a Mon Sep 17 00:00:00 2001 From: samyak jain Date: Wed, 24 Jan 2018 04:03:17 +0530 Subject: [PATCH] Fixes #1401 , Phrases behavious now consistent on different versions , test added for empty iterator --- gensim/models/phrases.py | 2 +- gensim/test/test_phrases.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py index 973eee9be5..33b8e0e8a3 100644 --- a/gensim/models/phrases.py +++ b/gensim/models/phrases.py @@ -115,7 +115,7 @@ def _is_single(obj): return True, obj_iter else: # If the first item isn't a string, assume obj is a corpus - return False, obj_iter + return False, list(obj_iter) class SentenceAnalyzer(object): diff --git a/gensim/test/test_phrases.py b/gensim/test/test_phrases.py index f0e9cea864..d1afa991be 100644 --- a/gensim/test/test_phrases.py +++ b/gensim/test/test_phrases.py @@ -160,6 +160,13 @@ def setUp(self): self.bigram_unicode = Phrases( self.unicode_sentences, min_count=1, threshold=1, common_terms=self.common_terms) + def testEmptyPhrasifiedSentencesIterator(self): + bigram_phrases = Phrases(self.sentences) + bigram_phraser = Phraser(bigram_phrases) + trigram_phrases = Phrases(bigram_phraser[self.sentences]) + trigram_phraser = Phraser(trigram_phrases) + self.assertNotEqual(trigram_phraser[bigram_phraser[self.sentences]].__len__(), 0) + def testEmptyInputsOnBigramConstruction(self): """Test that empty inputs don't throw errors and return the expected result.""" # Empty list -> empty list