Skip to content

Commit

Permalink
Warn and return raw text if only one sentence given to summarizer. Fi…
Browse files Browse the repository at this point in the history
…x for #851 (#887)
  • Loading branch information
metalaman authored and tmylk committed Sep 29, 2016
1 parent 842151d commit 2f0446f
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Changes
- bigram construction can now support multiple bigrams within one sentence
* Fixed issue #838, RuntimeWarning: overflow encountered in exp (@markroxor, [#895](https://github.com/RaRe-Technologies/gensim/pull/895))
* Changed some log messages to warnings as suggested in issue #828. (@rhnvrm, [#884](https://github.com/RaRe-Technologies/gensim/pull/884))
* Fixed issue #851, In summarizer.py, check for single sentence as an input added to avoid ZeroDivionError, added test cases in test/test_summarization.py(@metalaman, #887)


0.13.2, 2016-08-19
Expand Down
5 changes: 5 additions & 0 deletions gensim/summarization/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,11 @@ def summarize(text, ratio=0.2, word_count=None, split=False):
logger.warning("Input text is empty.")
return

# If only one sentence is present, the function return the input text (Avoids ZeroDivisionError).
if len(sentences) == 1:
logger.warning("Summarization not performed since the document has only one sentence.")
return text

# Warns if the text is too short.
if len(sentences) < INPUT_MIN_LENGTH:
logger.warning("Input text is expected to have at least " + str(INPUT_MIN_LENGTH) + " sentences.")
Expand Down
11 changes: 11 additions & 0 deletions gensim/test/test_summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,17 @@ def test_text_summarization_raises_exception_on_short_input_text(self):

self.assertTrue(summarize(text) is not None)

def test_text_summarization_returns_input_on_single_input_sentence(self):
pre_path = os.path.join(os.path.dirname(__file__), 'test_data')

with utils.smart_open(os.path.join(pre_path, "testsummarization_unrelated.txt"), mode="r") as f:
text = f.read()

# Keeps the first sentence only.
text = text.split('\n')[0]

self.assertEqual(summarize(text),text)

def test_corpus_summarization_raises_exception_on_short_input_text(self):
pre_path = os.path.join(os.path.dirname(__file__), 'test_data')

Expand Down
3 changes: 3 additions & 0 deletions gensim/test/test_wikicorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
import os
import sys
import types

import unittest

from gensim.corpora.wikicorpus import WikiCorpus



module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
FILENAME = 'enwiki-latest-pages-articles1.xml-p000000010p000030302-shortened.bz2'
Expand All @@ -29,6 +31,7 @@ def setUp(self):


def test_get_texts_returns_generator_of_lists(self):

if sys.version_info < (2, 7, 0):
return
wc = WikiCorpus(datapath(FILENAME))
Expand Down

0 comments on commit 2f0446f

Please sign in to comment.