forked from piskvorky/gensim
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ce290dc
commit d147d0c
Showing
2 changed files
with
54 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright (C) 2010 Radim Rehurek <radimrehurek@seznam.cz> | ||
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html | ||
|
||
""" | ||
Automated tests for checking transformation algorithms (the models package). | ||
""" | ||
|
||
import logging | ||
import unittest | ||
|
||
from gensim.summarization.bm25 import get_bm25_weights | ||
from gensim.test.utils import common_texts | ||
|
||
|
||
class TestBM25(unittest.TestCase): | ||
|
||
def test_max_match_with_itself(self) : | ||
""" Document should show maximum matching with itself """ | ||
weights = get_bm25_weights(common_texts) | ||
for index,doc_weights in enumerate(weights) : | ||
expected = max(doc_weights) | ||
predicted = doc_weights[index] | ||
self.assertEqual(expected , predicted) | ||
|
||
def test_nonnegative_weights(self): | ||
""" All the weights for a partiular document should be non negative """ | ||
weights = get_bm25_weights(common_texts) | ||
self.assertTrue(min(weights)>=0) | ||
|
||
def test_same_match_with_same_document(self): | ||
""" A document should always get the same weight when matched with a particular document """ | ||
corpus = [['cat','dog','mouse'] , ['cat','lion'] , ['cat','lion']] | ||
weights = get_bm25_weights(corpus) | ||
self.assertEqual(weights[0][1],weights[0][2]) | ||
|
||
def test_disjoint_docs_if_weight_zero(self): | ||
""" Two disjoint documents should have zero matching""" | ||
corpus = [['cat','dog','lion'] , ['goat','fish','tiger']] | ||
weights = get_bm25_weights(corpus) | ||
self.assertTrue(weights[0][1]==0) | ||
self.assertTrue(weights[1][0]==0) | ||
|
||
if __name__ == '__main__': | ||
logging.basicConfig(level=logging.DEBUG) | ||
unittest.main() | ||
|
||
|