piskvorky · tmylk · Nov 13, 2016 · Oct 31, 2016 · Nov 1, 2016 · Nov 1, 2016
diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py
@@ -392,6 +392,7 @@ def init_sims(self, replace=False):
         etc., but not `train` or `infer_vector`.
 
         """
+        print ('HELLO DOC!!!')
         if getattr(self, 'doctag_syn0norm', None) is None or replace:
             logger.info("precomputing L2-norms of doc weight vectors")
             if replace:
@@ -780,13 +781,9 @@ def __str__(self):
 
     def finished_training(self):
         """
-        Discard parametrs that are used in training and score. Use if you're sure you're done training a model,
+        Discard parametrs that are used in training and score. Use if you're sure you're done training a model.
         """
-        self.training_finished = True
-        if hasattr(self, 'syn1') and not self.hs:
-            del self.syn1
-        if hasattr(self, 'syn1neg') and not self.negative:
-            del self.syn1neg
+        self._minimize_model(self.hs, self.negative > 0, True)
         if hasattr(self, 'doctag_syn0'):
             del self.doctag_syn0
         if hasattr(self, 'doctag_syn0_lockf'):

diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
@@ -1750,16 +1750,23 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar, c
     def __str__(self):
         return "%s(vocab=%s, size=%s, alpha=%s)" % (self.__class__.__name__, len(self.index2word), self.vector_size, self.alpha)
 
+    def _minimize_model(self, save_syn1 = False, save_syn1neg = False, save_syn0_lockf = False):
+        if hasattr(self, 'syn1') and not save_syn1:
+            del self.syn1
+        if hasattr(self, 'syn1neg') and not save_syn1neg:
+            del self.syn1neg
+        if hasattr(self, 'syn0_lockf') and not save_syn0_lockf:
+            del self.syn0_lockf
+
     def finished_training(self):
         """
-        Discard parametrs that are used in training and score. Use if you're sure you're done training a model,
+        Discard parametrs that are used in training and score. Use if you're sure you're done training a model.
         """
         self.training_finished = True
-        self.init_sims(replace = True)
-        if hasattr(self, 'syn1neg'):
-            del self.syn1neg
-        if hasattr(self, 'syn0_lockf'):
-            del self.syn0_lockf
+        for i in xrange(self.syn0.shape[0]):
+            self.syn0[i, :] /= sqrt((self.syn0[i, :] ** 2).sum(-1))
+        self.syn0norm = self.syn0
+        self._minimize_model()
 
     def save(self, *args, **kwargs):
         # don't bother storing the cached normalized vectors, recalculable table

diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py
@@ -284,9 +284,19 @@ def test_finished_training(self):
         """Test doc2vec model after finishing training"""
         for i in [0, 1]:
             for j in [0, 1]:
-                model = doc2vec.Doc2Vec(sentences, size=5, min_count=1, negative=i, hs=j)
+                model = doc2vec.Doc2Vec(sentences, size=5, min_count=1, hs=i, negative=j)
                 model.finished_training()
-                self.assertTrue(len(model.infer_vector(['graph'])), 5)
+                self.assertTrue(len(model['human']), 10)
+                self.assertTrue(model.vocab['graph'].count, 5)
+                if (i == 1):
+                    self.assertTrue(hasattr(model, 'syn1'))
+                else:
+                    self.assertTrue(not hasattr(model, 'syn1'))
+                if (j == 1):
+                    self.assertTrue(hasattr(model, 'syn1neg'))
+                else:
+                    self.assertTrue(not hasattr(model, 'syn1neg'))
+                self.assertTrue(hasattr(model, 'syn0_lockf'))
 
     @log_capture()
     def testBuildVocabWarning(self, l):

diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py
@@ -488,8 +488,12 @@ def testFinishedTraining(self):
             for j in [0, 1]:
                 model = word2vec.Word2Vec(sentences, size=10, min_count=0, seed=42, hs=i, negative=j)
                 model.finished_training()
+                self.assertTrue(len(model['human']), 10)
                 self.assertTrue(len(model.vocab), 12)
-        self.assertTrue(model.vocab['graph'].count, 3)
+                self.assertTrue(model.vocab['graph'].count, 3)
+                self.assertTrue(not hasattr(model, 'syn1'))
+                self.assertTrue(not hasattr(model, 'syn1neg'))
+                self.assertTrue(not hasattr(model, 'syn0_lockf'))
         model = word2vec.Word2Vec(sentences, min_count=1)
         model.save_word2vec_format(testfile(), binary=True)
         norm_only_model = word2vec.Word2Vec.load_word2vec_format(testfile(), binary=True)