diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py index 973eee9be5..33b8e0e8a3 100644 --- a/gensim/models/phrases.py +++ b/gensim/models/phrases.py @@ -115,7 +115,7 @@ def _is_single(obj): return True, obj_iter else: # If the first item isn't a string, assume obj is a corpus - return False, obj_iter + return False, list(obj_iter) class SentenceAnalyzer(object): diff --git a/gensim/test/test_phrases.py b/gensim/test/test_phrases.py index f0e9cea864..d1afa991be 100644 --- a/gensim/test/test_phrases.py +++ b/gensim/test/test_phrases.py @@ -160,6 +160,13 @@ def setUp(self): self.bigram_unicode = Phrases( self.unicode_sentences, min_count=1, threshold=1, common_terms=self.common_terms) + def testEmptyPhrasifiedSentencesIterator(self): + bigram_phrases = Phrases(self.sentences) + bigram_phraser = Phraser(bigram_phrases) + trigram_phrases = Phrases(bigram_phraser[self.sentences]) + trigram_phraser = Phraser(trigram_phrases) + self.assertNotEqual(trigram_phraser[bigram_phraser[self.sentences]].__len__(), 0) + def testEmptyInputsOnBigramConstruction(self): """Test that empty inputs don't throw errors and return the expected result.""" # Empty list -> empty list