Skip to content

Commit

Permalink
add back smartirs tests
Browse files Browse the repository at this point in the history
  • Loading branch information
piskvorky committed Jun 21, 2018
1 parent 27b0e66 commit e2c72fa
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 0 deletions.
Binary file added gensim/test/test_data/tfidf_model.tst
Binary file not shown.
Binary file added gensim/test/test_data/tfidf_model.tst.bz2
Binary file not shown.
42 changes: 42 additions & 0 deletions gensim/test/test_tfidfmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,17 @@ def test_persistence(self):
self.assertTrue(np.allclose(model[tstvec[1]], model2[tstvec[1]]))
self.assertTrue(np.allclose(model[[]], model2[[]])) # try projecting an empty vector

# Test persistence between Gensim v3.2.0 and current model.
model3 = tfidfmodel.TfidfModel(self.corpus, smartirs="ntc")
model4 = tfidfmodel.TfidfModel.load(datapath('tfidf_model.tst'))
idfs3 = [model3.idfs[key] for key in sorted(model3.idfs.keys())]
idfs4 = [model4.idfs[key] for key in sorted(model4.idfs.keys())]
self.assertTrue(np.allclose(idfs3, idfs4))
tstvec = [corpus[1], corpus[2]]
self.assertTrue(np.allclose(model3[tstvec[0]], model4[tstvec[0]]))
self.assertTrue(np.allclose(model3[tstvec[1]], model4[tstvec[1]]))
self.assertTrue(np.allclose(model3[[]], model4[[]])) # try projecting an empty vector

# Test persistence with using pivoted normalization
fname = get_tmpfile('gensim_models_smartirs.tst')
model = tfidfmodel.TfidfModel(self.corpus, pivot=0, slope=1)
Expand All @@ -99,6 +110,16 @@ def test_persistence(self):
self.assertTrue(np.allclose(model[tstvec[0]], model2[tstvec[0]]))
self.assertTrue(np.allclose(model[tstvec[1]], model2[tstvec[1]]))

# Test persistence between Gensim v3.2.0 and pivoted normalization compressed model.
model3 = tfidfmodel.TfidfModel(self.corpus, pivot=0, slope=1)
model4 = tfidfmodel.TfidfModel.load(datapath('tfidf_model.tst'))
idfs3 = [model3.idfs[key] for key in sorted(model3.idfs.keys())]
idfs4 = [model4.idfs[key] for key in sorted(model4.idfs.keys())]
self.assertTrue(np.allclose(idfs3, idfs4))
tstvec = [corpus[1], corpus[2]]
self.assertTrue(np.allclose(model3[tstvec[0]], model4[tstvec[0]]))
self.assertTrue(np.allclose(model3[tstvec[1]], model4[tstvec[1]]))

def test_persistence_compressed(self):
# Test persistence without using `smartirs`
fname = get_tmpfile('gensim_models.tst.gz')
Expand All @@ -122,6 +143,17 @@ def test_persistence_compressed(self):
self.assertTrue(np.allclose(model[tstvec[1]], model2[tstvec[1]]))
self.assertTrue(np.allclose(model[[]], model2[[]])) # try projecting an empty vector

# Test persistence between Gensim v3.2.0 and current compressed model.
model3 = tfidfmodel.TfidfModel(self.corpus, smartirs="ntc")
model4 = tfidfmodel.TfidfModel.load(datapath('tfidf_model.tst.bz2'))
idfs3 = [model3.idfs[key] for key in sorted(model3.idfs.keys())]
idfs4 = [model4.idfs[key] for key in sorted(model4.idfs.keys())]
self.assertTrue(np.allclose(idfs3, idfs4))
tstvec = [corpus[1], corpus[2]]
self.assertTrue(np.allclose(model3[tstvec[0]], model4[tstvec[0]]))
self.assertTrue(np.allclose(model3[tstvec[1]], model4[tstvec[1]]))
self.assertTrue(np.allclose(model3[[]], model4[[]])) # try projecting an empty vector

# Test persistence with using pivoted normalization
fname = get_tmpfile('gensim_models_smartirs.tst.gz')
model = tfidfmodel.TfidfModel(self.corpus, pivot=0, slope=1)
Expand All @@ -132,6 +164,16 @@ def test_persistence_compressed(self):
self.assertTrue(np.allclose(model[tstvec[0]], model2[tstvec[0]]))
self.assertTrue(np.allclose(model[tstvec[1]], model2[tstvec[1]]))

# Test persistence between Gensim v3.2.0 and pivoted normalization compressed model.
model3 = tfidfmodel.TfidfModel(self.corpus, pivot=0, slope=1)
model4 = tfidfmodel.TfidfModel.load(datapath('tfidf_model.tst.bz2'))
idfs3 = [model3.idfs[key] for key in sorted(model3.idfs.keys())]
idfs4 = [model4.idfs[key] for key in sorted(model4.idfs.keys())]
self.assertTrue(np.allclose(idfs3, idfs4))
tstvec = [corpus[1], corpus[2]]
self.assertTrue(np.allclose(model3[tstvec[0]], model4[tstvec[0]]))
self.assertTrue(np.allclose(model3[tstvec[1]], model4[tstvec[1]]))

def test_consistency(self):
docs = [corpus[1], corpus[2]]

Expand Down

0 comments on commit e2c72fa

Please sign in to comment.