diff --git a/gensim/test/test_data/tfidf_model.tst b/gensim/test/test_data/tfidf_model.tst new file mode 100644 index 0000000000..8d3c60c73e Binary files /dev/null and b/gensim/test/test_data/tfidf_model.tst differ diff --git a/gensim/test/test_data/tfidf_model.tst.bz2 b/gensim/test/test_data/tfidf_model.tst.bz2 new file mode 100644 index 0000000000..f25e0399f4 Binary files /dev/null and b/gensim/test/test_data/tfidf_model.tst.bz2 differ diff --git a/gensim/test/test_tfidfmodel.py b/gensim/test/test_tfidfmodel.py index 3864076093..79e3742d48 100644 --- a/gensim/test/test_tfidfmodel.py +++ b/gensim/test/test_tfidfmodel.py @@ -89,6 +89,17 @@ def test_persistence(self): self.assertTrue(np.allclose(model[tstvec[1]], model2[tstvec[1]])) self.assertTrue(np.allclose(model[[]], model2[[]])) # try projecting an empty vector + # Test persistence between Gensim v3.2.0 and current model. + model3 = tfidfmodel.TfidfModel(self.corpus, smartirs="ntc") + model4 = tfidfmodel.TfidfModel.load(datapath('tfidf_model.tst')) + idfs3 = [model3.idfs[key] for key in sorted(model3.idfs.keys())] + idfs4 = [model4.idfs[key] for key in sorted(model4.idfs.keys())] + self.assertTrue(np.allclose(idfs3, idfs4)) + tstvec = [corpus[1], corpus[2]] + self.assertTrue(np.allclose(model3[tstvec[0]], model4[tstvec[0]])) + self.assertTrue(np.allclose(model3[tstvec[1]], model4[tstvec[1]])) + self.assertTrue(np.allclose(model3[[]], model4[[]])) # try projecting an empty vector + # Test persistence with using pivoted normalization fname = get_tmpfile('gensim_models_smartirs.tst') model = tfidfmodel.TfidfModel(self.corpus, pivot=0, slope=1) @@ -99,6 +110,16 @@ def test_persistence(self): self.assertTrue(np.allclose(model[tstvec[0]], model2[tstvec[0]])) self.assertTrue(np.allclose(model[tstvec[1]], model2[tstvec[1]])) + # Test persistence between Gensim v3.2.0 and pivoted normalization compressed model. + model3 = tfidfmodel.TfidfModel(self.corpus, pivot=0, slope=1) + model4 = tfidfmodel.TfidfModel.load(datapath('tfidf_model.tst')) + idfs3 = [model3.idfs[key] for key in sorted(model3.idfs.keys())] + idfs4 = [model4.idfs[key] for key in sorted(model4.idfs.keys())] + self.assertTrue(np.allclose(idfs3, idfs4)) + tstvec = [corpus[1], corpus[2]] + self.assertTrue(np.allclose(model3[tstvec[0]], model4[tstvec[0]])) + self.assertTrue(np.allclose(model3[tstvec[1]], model4[tstvec[1]])) + def test_persistence_compressed(self): # Test persistence without using `smartirs` fname = get_tmpfile('gensim_models.tst.gz') @@ -122,6 +143,17 @@ def test_persistence_compressed(self): self.assertTrue(np.allclose(model[tstvec[1]], model2[tstvec[1]])) self.assertTrue(np.allclose(model[[]], model2[[]])) # try projecting an empty vector + # Test persistence between Gensim v3.2.0 and current compressed model. + model3 = tfidfmodel.TfidfModel(self.corpus, smartirs="ntc") + model4 = tfidfmodel.TfidfModel.load(datapath('tfidf_model.tst.bz2')) + idfs3 = [model3.idfs[key] for key in sorted(model3.idfs.keys())] + idfs4 = [model4.idfs[key] for key in sorted(model4.idfs.keys())] + self.assertTrue(np.allclose(idfs3, idfs4)) + tstvec = [corpus[1], corpus[2]] + self.assertTrue(np.allclose(model3[tstvec[0]], model4[tstvec[0]])) + self.assertTrue(np.allclose(model3[tstvec[1]], model4[tstvec[1]])) + self.assertTrue(np.allclose(model3[[]], model4[[]])) # try projecting an empty vector + # Test persistence with using pivoted normalization fname = get_tmpfile('gensim_models_smartirs.tst.gz') model = tfidfmodel.TfidfModel(self.corpus, pivot=0, slope=1) @@ -132,6 +164,16 @@ def test_persistence_compressed(self): self.assertTrue(np.allclose(model[tstvec[0]], model2[tstvec[0]])) self.assertTrue(np.allclose(model[tstvec[1]], model2[tstvec[1]])) + # Test persistence between Gensim v3.2.0 and pivoted normalization compressed model. + model3 = tfidfmodel.TfidfModel(self.corpus, pivot=0, slope=1) + model4 = tfidfmodel.TfidfModel.load(datapath('tfidf_model.tst.bz2')) + idfs3 = [model3.idfs[key] for key in sorted(model3.idfs.keys())] + idfs4 = [model4.idfs[key] for key in sorted(model4.idfs.keys())] + self.assertTrue(np.allclose(idfs3, idfs4)) + tstvec = [corpus[1], corpus[2]] + self.assertTrue(np.allclose(model3[tstvec[0]], model4[tstvec[0]])) + self.assertTrue(np.allclose(model3[tstvec[1]], model4[tstvec[1]])) + def test_consistency(self): docs = [corpus[1], corpus[2]]