diff --git a/gensim/matutils.py b/gensim/matutils.py
index 85118f892c..38e2431caf 100644
--- a/gensim/matutils.py
+++ b/gensim/matutils.py
@@ -28,7 +28,9 @@
 from six.moves import xrange, zip as izip
 
 
-blas = lambda name, ndarray: scipy.linalg.get_blas_funcs((name,), (ndarray,))[0]
+def blas(name, ndarray):
+    return scipy.linalg.get_blas_funcs((name,), (ndarray,))[0]
+
 
 logger = logging.getLogger(__name__)
 
diff --git a/gensim/similarities/docsim.py b/gensim/similarities/docsim.py
index 5e93c6f8cf..20c9f8518c 100755
--- a/gensim/similarities/docsim.py
+++ b/gensim/similarities/docsim.py
@@ -336,18 +336,21 @@ def __getitem__(self, query):
             # the following uses a lot of lazy evaluation and (optionally) parallel
             # processing, to improve query latency and minimize memory footprint.
             offsets = numpy.cumsum([0] + [len(shard) for shard in self.shards])
-            convert = lambda doc, shard_no: [(doc_index + offsets[shard_no], sim) for doc_index, sim in doc]
+
+            def convert(shard_no, doc):
+                return [(doc_index + offsets[shard_no], sim) for doc_index, sim in doc]
+
             is_corpus, query = utils.is_corpus(query)
             is_corpus = is_corpus or hasattr(query, 'ndim') and query.ndim > 1 and query.shape[0] > 1
             if not is_corpus:
                 # user asked for num_best most similar and query is a single doc
-                results = (convert(result, shard_no) for shard_no, result in enumerate(shard_results))
+                results = (convert(shard_no, result) for shard_no, result in enumerate(shard_results))
                 result = heapq.nlargest(self.num_best, itertools.chain(*results), key=lambda item: item[1])
             else:
                 # the trickiest combination: returning num_best results when query was a corpus
                 results = []
                 for shard_no, result in enumerate(shard_results):
-                    shard_result = [convert(doc, shard_no) for doc in result]
+                    shard_result = [convert(shard_no, doc) for doc in result]
                     results.append(shard_result)
                 result = []
                 for parts in izip(*results):
diff --git a/gensim/sklearn_api/atmodel.py b/gensim/sklearn_api/atmodel.py
index d3128243a6..8845bdd816 100644
--- a/gensim/sklearn_api/atmodel.py
+++ b/gensim/sklearn_api/atmodel.py
@@ -76,17 +76,11 @@ def transform(self, author_names):
                 "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method."
             )
 
-        check = lambda x: [x] if not isinstance(x, list) else x
-        author_names = check(author_names)
-        X = [[] for _ in range(0, len(author_names))]
-
-        for k, v in enumerate(author_names):
-            transformed_author = self.gensim_model[v]
-            # returning dense representation for compatibility with sklearn but we should go back to sparse representation in the future
-            probs_author = matutils.sparse2full(transformed_author, self.num_topics)
-            X[k] = probs_author
-
-        return np.reshape(np.array(X), (len(author_names), self.num_topics))
+        if not isinstance(author_names, list):
+            author_names = [author_names]
+        # returning dense representation for compatibility with sklearn but we should go back to sparse representation in the future
+        topics = [matutils.sparse2full(self.gensim_model[author_name], self.num_topics) for author_name in author_names]
+        return np.reshape(np.array(topics), (len(author_names), self.num_topics))
 
     def partial_fit(self, X, author2doc=None, doc2author=None):
         """
diff --git a/gensim/sklearn_api/d2vmodel.py b/gensim/sklearn_api/d2vmodel.py
index 14163f1600..245231ad45 100644
--- a/gensim/sklearn_api/d2vmodel.py
+++ b/gensim/sklearn_api/d2vmodel.py
@@ -87,12 +87,7 @@ def transform(self, docs):
             )
 
         # The input as array of array
-        check = lambda x: [x] if isinstance(x[0], string_types) else x
-        docs = check(docs)
-        X = [[] for _ in range(0, len(docs))]
-
-        for k, v in enumerate(docs):
-            doc_vec = self.gensim_model.infer_vector(v)
-            X[k] = doc_vec
-
-        return np.reshape(np.array(X), (len(docs), self.gensim_model.vector_size))
+        if isinstance(docs[0], string_types):
+            docs = [docs]
+        vectors = [self.gensim_model.infer_vector(doc) for doc in docs]
+        return np.reshape(np.array(vectors), (len(docs), self.gensim_model.vector_size))
diff --git a/gensim/sklearn_api/hdp.py b/gensim/sklearn_api/hdp.py
index d1dcec01a5..acadd6f459 100644
--- a/gensim/sklearn_api/hdp.py
+++ b/gensim/sklearn_api/hdp.py
@@ -77,21 +77,18 @@ def transform(self, docs):
             )
 
         # The input as array of array
-        check = lambda x: [x] if isinstance(x[0], tuple) else x
-        docs = check(docs)
-        X = [[] for _ in range(0, len(docs))]
-
-        max_num_topics = 0
-        for k, v in enumerate(docs):
-            X[k] = self.gensim_model[v]
-            max_num_topics = max(max_num_topics, max(x[0] for x in X[k]) + 1)
-
-        for k, v in enumerate(X):
-            # returning dense representation for compatibility with sklearn but we should go back to sparse representation in the future
-            dense_vec = matutils.sparse2full(v, max_num_topics)
-            X[k] = dense_vec
-
-        return np.reshape(np.array(X), (len(docs), max_num_topics))
+        if isinstance(docs[0], tuple):
+            docs = [docs]
+        distribution, max_num_topics = [], 0
+
+        for doc in docs:
+            topicd = self.gensim_model[doc]
+            distribution.append(topicd)
+            max_num_topics = max(max_num_topics, max(topic[0] for topic in topicd) + 1)
+
+        # returning dense representation for compatibility with sklearn but we should go back to sparse representation in the future
+        distribution = [matutils.sparse2full(t, max_num_topics) for t in distribution]
+        return np.reshape(np.array(distribution), (len(docs), max_num_topics))
 
     def partial_fit(self, X):
         """
diff --git a/gensim/sklearn_api/ldamodel.py b/gensim/sklearn_api/ldamodel.py
index 77d539e616..40d7c52db0 100644
--- a/gensim/sklearn_api/ldamodel.py
+++ b/gensim/sklearn_api/ldamodel.py
@@ -83,16 +83,11 @@ def transform(self, docs):
             raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
 
         # The input as array of array
-        check = lambda x: [x] if isinstance(x[0], tuple) else x
-        docs = check(docs)
-        X = [[] for _ in range(0, len(docs))]
-
-        for k, v in enumerate(docs):
-            doc_topics = self.gensim_model[v]
-            # returning dense representation for compatibility with sklearn but we should go back to sparse representation in the future
-            probs_docs = matutils.sparse2full(doc_topics, self.num_topics)
-            X[k] = probs_docs
-        return np.reshape(np.array(X), (len(docs), self.num_topics))
+        if isinstance(docs[0], tuple):
+            docs = [docs]
+        # returning dense representation for compatibility with sklearn but we should go back to sparse representation in the future
+        distribution = [matutils.sparse2full(self.gensim_model[doc], self.num_topics) for doc in docs]
+        return np.reshape(np.array(distribution), (len(docs), self.num_topics))
 
     def partial_fit(self, X):
         """
diff --git a/gensim/sklearn_api/ldaseqmodel.py b/gensim/sklearn_api/ldaseqmodel.py
index 6b96d8d6fa..2c5d0879d4 100644
--- a/gensim/sklearn_api/ldaseqmodel.py
+++ b/gensim/sklearn_api/ldaseqmodel.py
@@ -69,12 +69,7 @@ def transform(self, docs):
             raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
 
         # The input as array of array
-        check = lambda x: [x] if isinstance(x[0], tuple) else x
-        docs = check(docs)
-        X = [[] for _ in range(0, len(docs))]
-
-        for k, v in enumerate(docs):
-            transformed_author = self.gensim_model[v]
-            X[k] = transformed_author
-
-        return np.reshape(np.array(X), (len(docs), self.num_topics))
+        if isinstance(docs[0], tuple):
+            docs = [docs]
+        proportions = [self.gensim_model[doc] for doc in docs]
+        return np.reshape(np.array(proportions), (len(docs), self.num_topics))
diff --git a/gensim/sklearn_api/lsimodel.py b/gensim/sklearn_api/lsimodel.py
index 776af6f5da..30263b02af 100644
--- a/gensim/sklearn_api/lsimodel.py
+++ b/gensim/sklearn_api/lsimodel.py
@@ -67,15 +67,11 @@ def transform(self, docs):
             )
 
         # The input as array of array
-        check = lambda x: [x] if isinstance(x[0], tuple) else x
-        docs = check(docs)
-        X = [[] for i in range(0, len(docs))]
-        for k, v in enumerate(docs):
-            doc_topics = self.gensim_model[v]
-            # returning dense representation for compatibility with sklearn but we should go back to sparse representation in the future
-            probs_docs = matutils.sparse2full(doc_topics, self.num_topics)
-            X[k] = probs_docs
-        return np.reshape(np.array(X), (len(docs), self.num_topics))
+        if isinstance(docs[0], tuple):
+            docs = [docs]
+        # returning dense representation for compatibility with sklearn but we should go back to sparse representation in the future
+        distribution = [matutils.sparse2full(self.gensim_model[doc], self.num_topics) for doc in docs]
+        return np.reshape(np.array(distribution), (len(docs), self.num_topics))
 
     def partial_fit(self, X):
         """
diff --git a/gensim/sklearn_api/phrases.py b/gensim/sklearn_api/phrases.py
index 2eab84b95e..fcd7d4c5f1 100644
--- a/gensim/sklearn_api/phrases.py
+++ b/gensim/sklearn_api/phrases.py
@@ -50,15 +50,9 @@ def transform(self, docs):
             raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
 
         # input as python lists
-        check = lambda x: [x] if isinstance(x[0], string_types) else x
-        docs = check(docs)
-        X = [[] for _ in range(0, len(docs))]
-
-        for k, v in enumerate(docs):
-            phrase_tokens = self.gensim_model[v]
-            X[k] = phrase_tokens
-
-        return X
+        if isinstance(docs[0], string_types):
+            docs = [docs]
+        return [self.gensim_model[doc] for doc in docs]
 
     def partial_fit(self, X):
         if self.gensim_model is None:
diff --git a/gensim/sklearn_api/rpmodel.py b/gensim/sklearn_api/rpmodel.py
index 62395e0bce..59d4c87a45 100644
--- a/gensim/sklearn_api/rpmodel.py
+++ b/gensim/sklearn_api/rpmodel.py
@@ -52,14 +52,8 @@ def transform(self, docs):
             )
 
         # The input as array of array
-        check = lambda x: [x] if isinstance(x[0], tuple) else x
-        docs = check(docs)
-        X = [[] for _ in range(0, len(docs))]
-
-        for k, v in enumerate(docs):
-            transformed_doc = self.gensim_model[v]
-            # returning dense representation for compatibility with sklearn but we should go back to sparse representation in the future
-            probs_docs = matutils.sparse2full(transformed_doc, self.num_topics)
-            X[k] = probs_docs
-
-        return np.reshape(np.array(X), (len(docs), self.num_topics))
+        if isinstance(docs[0], tuple):
+            docs = [docs]
+        # returning dense representation for compatibility with sklearn but we should go back to sparse representation in the future
+        presentation = [matutils.sparse2full(self.gensim_model[doc], self.num_topics) for doc in docs]
+        return np.reshape(np.array(presentation), (len(docs), self.num_topics))
diff --git a/gensim/sklearn_api/text2bow.py b/gensim/sklearn_api/text2bow.py
index 6beb126d0d..e71a954c32 100644
--- a/gensim/sklearn_api/text2bow.py
+++ b/gensim/sklearn_api/text2bow.py
@@ -48,16 +48,10 @@ def transform(self, docs):
             )
 
         # input as python lists
-        check = lambda x: [x] if isinstance(x, string_types) else x
-        docs = check(docs)
-        tokenized_docs = [list(self.tokenizer(x)) for x in docs]
-        X = [[] for _ in range(0, len(tokenized_docs))]
-
-        for k, v in enumerate(tokenized_docs):
-            bow_val = self.gensim_model.doc2bow(v)
-            X[k] = bow_val
-
-        return X
+        if isinstance(docs, string_types):
+            docs = [docs]
+        tokenized_docs = (list(self.tokenizer(doc)) for doc in docs)
+        return [self.gensim_model.doc2bow(doc) for doc in tokenized_docs]
 
     def partial_fit(self, X):
         if self.gensim_model is None:
diff --git a/gensim/sklearn_api/tfidf.py b/gensim/sklearn_api/tfidf.py
index 414c597dc1..7952d11e75 100644
--- a/gensim/sklearn_api/tfidf.py
+++ b/gensim/sklearn_api/tfidf.py
@@ -51,12 +51,6 @@ def transform(self, docs):
             )
 
         # input as python lists
-        check = lambda x: [x] if isinstance(x[0], tuple) else x
-        docs = check(docs)
-        X = [[] for _ in range(0, len(docs))]
-
-        for k, v in enumerate(docs):
-            transformed_doc = self.gensim_model[v]
-            X[k] = transformed_doc
-
-        return X
+        if isinstance(docs[0], tuple):
+            docs = [docs]
+        return [self.gensim_model[doc] for doc in docs]
diff --git a/gensim/sklearn_api/w2vmodel.py b/gensim/sklearn_api/w2vmodel.py
index 6ddea2eb90..317842ee07 100644
--- a/gensim/sklearn_api/w2vmodel.py
+++ b/gensim/sklearn_api/w2vmodel.py
@@ -75,15 +75,10 @@ def transform(self, words):
             )
 
         # The input as array of array
-        check = lambda x: [x] if isinstance(x, six.string_types) else x
-        words = check(words)
-        X = [[] for _ in range(0, len(words))]
-
-        for k, v in enumerate(words):
-            word_vec = self.gensim_model[v]
-            X[k] = word_vec
-
-        return np.reshape(np.array(X), (len(words), self.size))
+        if isinstance(words, six.string_types):
+            words = [words]
+        vectors = [self.gensim_model[word] for word in words]
+        return np.reshape(np.array(vectors), (len(words), self.size))
 
     def partial_fit(self, X):
         raise NotImplementedError(
diff --git a/gensim/test/test_atmodel.py b/gensim/test/test_atmodel.py
index 97bffde623..5947280f59 100644
--- a/gensim/test/test_atmodel.py
+++ b/gensim/test/test_atmodel.py
@@ -14,9 +14,6 @@
 
 import logging
 import unittest
-import os
-import os.path
-import tempfile
 import numbers
 from os import remove
 
@@ -27,6 +24,8 @@
 from gensim.models import atmodel
 from gensim import matutils
 from gensim.test import basetmtests
+from gensim.test.utils import (datapath,
+    get_tmpfile, common_texts, common_dictionary as dictionary, common_corpus as corpus)
 
 # TODO:
 # Test that computing the bound on new unseen documents works as expected (this is somewhat different
@@ -36,23 +35,6 @@
 # increases the bound.
 # Test that models are compatiple across versions, as done in LdaModel.
 
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-# set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-dictionary = Dictionary(texts)
-corpus = [dictionary.doc2bow(text) for text in texts]
 
 # Assign some authors randomly to the documents above.
 author2doc = {
@@ -61,6 +43,7 @@
     'jack': [0, 2, 4, 6, 8],
     'jill': [1, 3, 5, 7]
 }
+
 doc2author = {
     0: ['john', 'jack'],
     1: ['john', 'jill'],
@@ -76,18 +59,12 @@
 # More data with new and old authors (to test update method).
 # Although the text is just a subset of the previous, the model
 # just sees it as completely new data.
-texts_new = texts[0:3]
+texts_new = common_texts[0:3]
 author2doc_new = {'jill': [0], 'bob': [0, 1], 'sally': [1, 2]}
 dictionary_new = Dictionary(texts_new)
 corpus_new = [dictionary_new.doc2bow(text) for text in texts_new]
 
 
-def testfile(test_fname=''):
-    # temporary data will be stored to this file
-    fname = 'gensim_models_' + test_fname + '.tst'
-    return os.path.join(tempfile.gettempdir(), fname)
-
-
 class TestAuthorTopicModel(unittest.TestCase, basetmtests.TestBaseTopicModel):
     def setUp(self):
         self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
@@ -475,30 +452,32 @@ def testPasses(self):
         # long message includes the original error message with a custom one
         self.longMessage = True
         # construct what we expect when passes aren't involved
-        test_rhots = list()
+        test_rhots = []
         model = self.class_(id2word=dictionary, chunksize=1, num_topics=2)
-        final_rhot = lambda: pow(model.offset + (1 * model.num_updates) / model.chunksize, -model.decay)
+
+        def final_rhot(model):
+            return pow(model.offset + (1 * model.num_updates) / model.chunksize, -model.decay)
 
         # generate 5 updates to test rhot on
-        for x in range(5):
+        for _ in range(5):
             model.update(corpus, author2doc)
-            test_rhots.append(final_rhot())
+            test_rhots.append(final_rhot(model))
 
         for passes in [1, 5, 10, 50, 100]:
             model = self.class_(id2word=dictionary, chunksize=1, num_topics=2, passes=passes)
-            self.assertEqual(final_rhot(), 1.0)
+            self.assertEqual(final_rhot(model), 1.0)
             # make sure the rhot matches the test after each update
             for test_rhot in test_rhots:
                 model.update(corpus, author2doc)
 
                 msg = "{}, {}, {}".format(passes, model.num_updates, model.state.numdocs)
-                self.assertAlmostEqual(final_rhot(), test_rhot, msg=msg)
+                self.assertAlmostEqual(final_rhot(model), test_rhot, msg=msg)
 
             self.assertEqual(model.state.numdocs, len(corpus) * len(test_rhots))
             self.assertEqual(model.num_updates, len(corpus) * len(test_rhots))
 
     def testPersistence(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_models_atmodel.tst')
         model = self.model
         model.save(fname)
         model2 = self.class_.load(fname)
@@ -507,7 +486,7 @@ def testPersistence(self):
         self.assertTrue(np.allclose(model.state.gamma, model2.state.gamma))
 
     def testPersistenceIgnore(self):
-        fname = testfile('testPersistenceIgnore')
+        fname = get_tmpfile('gensim_models_atmodel_testPersistenceIgnore.tst')
         model = atmodel.AuthorTopicModel(corpus, author2doc=author2doc, num_topics=2)
         model.save(fname, ignore='id2word')
         model2 = atmodel.AuthorTopicModel.load(fname)
@@ -518,7 +497,7 @@ def testPersistenceIgnore(self):
         self.assertTrue(model2.id2word is None)
 
     def testPersistenceCompressed(self):
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models_atmodel.tst.gz')
         model = self.model
         model.save(fname)
         model2 = self.class_.load(fname, mmap=None)
@@ -533,7 +512,7 @@ def testPersistenceCompressed(self):
         self.assertTrue(np.allclose(jill_topics, jill_topics2))
 
     def testLargeMmap(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_models_atmodel.tst')
         model = self.model
 
         # simulate storing large arrays separately
@@ -553,7 +532,7 @@ def testLargeMmap(self):
         self.assertTrue(np.allclose(jill_topics, jill_topics2))
 
     def testLargeMmapCompressed(self):
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models_atmodel.tst.gz')
         model = self.model
 
         # simulate storing large arrays separately
diff --git a/gensim/test/test_big.py b/gensim/test/test_big.py
index abf19c63c7..f422953d18 100644
--- a/gensim/test/test_big.py
+++ b/gensim/test/test_big.py
@@ -12,16 +12,11 @@
 import logging
 import unittest
 import os
-import tempfile
 
 import numpy as np
 
 import gensim
-
-
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_big.tst')
+from gensim.test.utils import get_tmpfile
 
 
 class BigCorpus(object):
@@ -50,24 +45,27 @@ class TestLargeData(unittest.TestCase):
 
         def testWord2Vec(self):
             corpus = BigCorpus(words_only=True, num_docs=100000, num_terms=3000000, doc_len=200)
+            tmpf = get_tmpfile('gensim_big.tst')
             model = gensim.models.Word2Vec(corpus, size=300, workers=4)
-            model.save(testfile(), ignore=['syn1'])
+            model.save(tmpf, ignore=['syn1'])
             del model
-            gensim.models.Word2Vec.load(testfile())
+            gensim.models.Word2Vec.load(tmpf)
 
         def testLsiModel(self):
             corpus = BigCorpus(num_docs=50000)
+            tmpf = get_tmpfile('gensim_big.tst')
             model = gensim.models.LsiModel(corpus, num_topics=500, id2word=corpus.dictionary)
-            model.save(testfile())
+            model.save(tmpf)
             del model
-            gensim.models.LsiModel.load(testfile())
+            gensim.models.LsiModel.load(tmpf)
 
         def testLdaModel(self):
             corpus = BigCorpus(num_docs=5000)
+            tmpf = get_tmpfile('gensim_big.tst')
             model = gensim.models.LdaModel(corpus, num_topics=500, id2word=corpus.dictionary)
-            model.save(testfile())
+            model.save(tmpf)
             del model
-            gensim.models.LdaModel.load(testfile())
+            gensim.models.LdaModel.load(tmpf)
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_coherencemodel.py b/gensim/test/test_coherencemodel.py
index 787d661f89..523e658e66 100644
--- a/gensim/test/test_coherencemodel.py
+++ b/gensim/test/test_coherencemodel.py
@@ -10,44 +10,26 @@
 
 import logging
 import os
-import tempfile
 import unittest
 from unittest import SkipTest
 import multiprocessing as mp
+from functools import partial
 
 import numpy as np
-from gensim.corpora.dictionary import Dictionary
 from gensim.matutils import argsort
 from gensim.models.coherencemodel import CoherenceModel, BOOLEAN_DOCUMENT_BASED
 from gensim.models.ldamodel import LdaModel
 from gensim.models.wrappers import LdaMallet
 from gensim.models.wrappers import LdaVowpalWabbit
-
-
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
+from gensim.test.utils import get_tmpfile, common_texts, common_dictionary, common_corpus
 
 
 class TestCoherenceModel(unittest.TestCase):
 
     # set up vars used in testing ("Deerwester" from the web tutorial)
-    texts = [
-        ['human', 'interface', 'computer'],
-        ['survey', 'user', 'computer', 'system', 'response', 'time'],
-        ['eps', 'user', 'interface', 'system'],
-        ['system', 'human', 'system', 'eps'],
-        ['user', 'response', 'time'],
-        ['trees'],
-        ['graph', 'trees'],
-        ['graph', 'minors', 'trees'],
-        ['graph', 'minors', 'survey']
-    ]
-    dictionary = Dictionary(texts)
-
-    @classmethod
-    def setUpClass(cls):
-        cls.corpus = [cls.dictionary.doc2bow(text) for text in cls.texts]
+    texts = common_texts
+    dictionary = common_dictionary
+    corpus = common_corpus
 
     def setUp(self):
         # Suppose given below are the topics which two different LdaModels come up with.
@@ -215,23 +197,20 @@ def testErrors(self):
         )
 
     def testProcesses(self):
-        cpu = mp.cpu_count()
-        get_model = lambda p: CoherenceModel(
-            topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass', processes=p,
+        get_model = partial(CoherenceModel,
+            topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass'
         )
 
-        model = CoherenceModel(
-            topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass',
-        )
-        self.assertEqual(model.processes, cpu - 1)
+        model, used_cpus = get_model(), mp.cpu_count() - 1
+        self.assertEqual(model.processes, used_cpus)
         for p in range(-2, 1):
-            self.assertEqual(get_model(p).processes, cpu - 1)
+            self.assertEqual(get_model(processes=p).processes, used_cpus)
 
         for p in range(1, 4):
-            self.assertEqual(get_model(p).processes, p)
+            self.assertEqual(get_model(processes=p).processes, p)
 
     def testPersistence(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_models_coherence.tst')
         model = CoherenceModel(
             topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass'
         )
@@ -240,7 +219,7 @@ def testPersistence(self):
         self.assertTrue(model.get_coherence() == model2.get_coherence())
 
     def testPersistenceCompressed(self):
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models_coherence.tst.gz')
         model = CoherenceModel(
             topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass'
         )
@@ -249,7 +228,7 @@ def testPersistenceCompressed(self):
         self.assertTrue(model.get_coherence() == model2.get_coherence())
 
     def testPersistenceAfterProbabilityEstimationUsingCorpus(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_similarities.tst.pkl')
         model = CoherenceModel(
             topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass'
         )
@@ -260,7 +239,7 @@ def testPersistenceAfterProbabilityEstimationUsingCorpus(self):
         self.assertTrue(model.get_coherence() == model2.get_coherence())
 
     def testPersistenceAfterProbabilityEstimationUsingTexts(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_similarities.tst.pkl')
         model = CoherenceModel(
             topics=self.topics1, texts=self.texts, dictionary=self.dictionary, coherence='c_v'
         )
diff --git a/gensim/test/test_corpora.py b/gensim/test/test_corpora.py
index a990c6be94..4ddc16e0cf 100644
--- a/gensim/test/test_corpora.py
+++ b/gensim/test/test_corpora.py
@@ -23,18 +23,7 @@
                             ucicorpus, malletcorpus, textcorpus, indexedcorpus)
 from gensim.interfaces import TransformedCorpus
 from gensim.utils import to_unicode
-
-# needed because sample data files are located in the same folder
-module_path = os.path.dirname(__file__)
-
-
-def datapath(fname):
-    return os.path.join(module_path, 'test_data', fname)
-
-
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_corpus.tst')
+from gensim.test.utils import datapath, get_tmpfile
 
 
 class DummyTransformer(object):
@@ -61,7 +50,7 @@ def run(self, result=None):
 
     def tearDown(self):
         # remove all temporary test files
-        fname = testfile()
+        fname = get_tmpfile('gensim_corpus.tst')
         extensions = ['', '', '.bz2', '.gz', '.index', '.vocab']
         for ext in itertools.permutations(extensions, 2):
             try:
@@ -93,13 +82,14 @@ def test_len(self):
         self.assertEqual(len(corpus), 9)
 
     def test_empty_input(self):
-        with open(testfile(), 'w') as f:
+        tmpf = get_tmpfile('gensim_corpus.tst')
+        with open(tmpf, 'w') as f:
             f.write('')
 
-        with open(testfile() + '.vocab', 'w') as f:
+        with open(tmpf + '.vocab', 'w') as f:
             f.write('')
 
-        corpus = self.corpus_class(testfile())
+        corpus = self.corpus_class(tmpf)
         self.assertEqual(len(corpus), 0)
 
         docs = list(corpus)
@@ -107,22 +97,24 @@ def test_empty_input(self):
 
     def test_save(self):
         corpus = self.TEST_CORPUS
+        tmpf = get_tmpfile('gensim_corpus.tst')
 
         # make sure the corpus can be saved
-        self.corpus_class.save_corpus(testfile(), corpus)
+        self.corpus_class.save_corpus(tmpf, corpus)
 
         # and loaded back, resulting in exactly the same corpus
-        corpus2 = list(self.corpus_class(testfile()))
+        corpus2 = list(self.corpus_class(tmpf))
         self.assertEqual(corpus, corpus2)
 
     def test_serialize(self):
         corpus = self.TEST_CORPUS
+        tmpf = get_tmpfile('gensim_corpus.tst')
 
         # make sure the corpus can be saved
-        self.corpus_class.serialize(testfile(), corpus)
+        self.corpus_class.serialize(tmpf, corpus)
 
         # and loaded back, resulting in exactly the same corpus
-        corpus2 = self.corpus_class(testfile())
+        corpus2 = self.corpus_class(tmpf)
         self.assertEqual(corpus, list(corpus2))
 
         # make sure the indexing corpus[i] works
@@ -137,9 +129,10 @@ def test_serialize(self):
 
     def test_serialize_compressed(self):
         corpus = self.TEST_CORPUS
+        tmpf = get_tmpfile('gensim_corpus.tst')
 
         for extension in ['.gz', '.bz2']:
-            fname = testfile() + extension
+            fname = tmpf + extension
             # make sure the corpus can be saved
             self.corpus_class.serialize(fname, corpus)
 
@@ -252,7 +245,7 @@ def setUp(self):
 
     def test_save_format_for_dtm(self):
         corpus = [[(1, 1.0)], [], [(0, 5.0), (2, 1.0)], []]
-        test_file = testfile()
+        test_file = get_tmpfile('gensim_corpus.tst')
         self.corpus_class.save_corpus(test_file, corpus)
         with open(test_file) as f:
             for line in f:
@@ -493,7 +486,7 @@ def test_non_trivial_structure(self):
         .
         ├── 0.txt
         ├── a_folder
-        │   └── 1.txt
+        │   └── 1.txt
         └── b_folder
             ├── 2.txt
             ├── 3.txt
diff --git a/gensim/test/test_corpora_dictionary.py b/gensim/test/test_corpora_dictionary.py
index f6c7d8b43c..e0b8d1e426 100644
--- a/gensim/test/test_corpora_dictionary.py
+++ b/gensim/test/test_corpora_dictionary.py
@@ -10,7 +10,6 @@
 
 from collections import Mapping
 import logging
-import tempfile
 import unittest
 import codecs
 import os
@@ -20,31 +19,14 @@
 import gensim
 from gensim.corpora import Dictionary
 from gensim.utils import to_utf8
+from gensim.test.utils import get_tmpfile, common_texts
 from six import PY3
 from six.moves import zip
 
 
-# sample data files are located in the same folder
-module_path = os.path.dirname(__file__)
-
-
-def get_tmpfile(suffix):
-    return os.path.join(tempfile.gettempdir(), suffix)
-
-
 class TestDictionary(unittest.TestCase):
     def setUp(self):
-        self.texts = [
-                ['human', 'interface', 'computer'],
-                ['survey', 'user', 'computer', 'system', 'response', 'time'],
-                ['eps', 'user', 'interface', 'system'],
-                ['system', 'human', 'system', 'eps'],
-                ['user', 'response', 'time'],
-                ['trees'],
-                ['graph', 'trees'],
-                ['graph', 'minors', 'trees'],
-                ['graph', 'minors', 'survey']
-        ]
+        self.texts = common_texts
 
     def testDocFreqOneDoc(self):
         texts = [['human', 'interface', 'computer']]
diff --git a/gensim/test/test_corpora_hashdictionary.py b/gensim/test/test_corpora_hashdictionary.py
index 808246dc59..74f28eb0bb 100644
--- a/gensim/test/test_corpora_hashdictionary.py
+++ b/gensim/test/test_corpora_hashdictionary.py
@@ -9,35 +9,17 @@
 
 
 import logging
-import tempfile
 import unittest
 import os
 import zlib
 
 from gensim.corpora.hashdictionary import HashDictionary
-
-
-# sample data files are located in the same folder
-module_path = os.path.dirname(__file__)
-
-
-def get_tmpfile(suffix):
-    return os.path.join(tempfile.gettempdir(), suffix)
+from gensim.test.utils import get_tmpfile, common_texts
 
 
 class TestHashDictionary(unittest.TestCase):
     def setUp(self):
-        self.texts = [
-                ['human', 'interface', 'computer'],
-                ['survey', 'user', 'computer', 'system', 'response', 'time'],
-                ['eps', 'user', 'interface', 'system'],
-                ['system', 'human', 'system', 'eps'],
-                ['user', 'response', 'time'],
-                ['trees'],
-                ['graph', 'trees'],
-                ['graph', 'minors', 'trees'],
-                ['graph', 'minors', 'survey']
-        ]
+        self.texts = common_texts
 
     def testDocFreqOneDoc(self):
         texts = [['human', 'interface', 'computer']]
diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py
index 6feeab3bd2..0d49c9e2e3 100644
--- a/gensim/test/test_doc2vec.py
+++ b/gensim/test/test_doc2vec.py
@@ -14,7 +14,6 @@
 import logging
 import unittest
 import os
-import tempfile
 
 from six.moves import zip as izip
 from collections import namedtuple
@@ -24,9 +23,7 @@
 
 from gensim import utils
 from gensim.models import doc2vec, keyedvectors
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
+from gensim.test.utils import datapath, get_tmpfile, common_texts as raw_sentences
 
 
 class DocsLeeCorpus(object):
@@ -49,56 +46,42 @@ def __iter__(self):
 
 list_corpus = list(DocsLeeCorpus())
 
-raw_sentences = [
-        ['human', 'interface', 'computer'],
-        ['survey', 'user', 'computer', 'system', 'response', 'time'],
-        ['eps', 'user', 'interface', 'system'],
-        ['system', 'human', 'system', 'eps'],
-        ['user', 'response', 'time'],
-        ['trees'],
-        ['graph', 'trees'],
-        ['graph', 'minors', 'trees'],
-        ['graph', 'minors', 'survey']
-    ]
 
 sentences = [doc2vec.TaggedDocument(words, [i]) for i, words in enumerate(raw_sentences)]
 
 
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_doc2vec.tst')
-
-
 def load_on_instance():
     # Save and load a Doc2Vec Model on instance for test
+    tmpf = get_tmpfile('gensim_doc2vec.tst')
     model = doc2vec.Doc2Vec(DocsLeeCorpus(), min_count=1)
-    model.save(testfile())
+    model.save(tmpf)
     model = doc2vec.Doc2Vec()  # should fail at this point
-    return model.load(testfile())
+    return model.load(tmpf)
 
 
 class TestDoc2VecModel(unittest.TestCase):
     def test_persistence(self):
         """Test storing/loading the entire model."""
+        tmpf = get_tmpfile('gensim_doc2vec.tst')
         model = doc2vec.Doc2Vec(DocsLeeCorpus(), min_count=1)
-        model.save(testfile())
-        self.models_equal(model, doc2vec.Doc2Vec.load(testfile()))
+        model.save(tmpf)
+        self.models_equal(model, doc2vec.Doc2Vec.load(tmpf))
 
     def testPersistenceWord2VecFormat(self):
         """Test storing the entire model in word2vec format."""
         model = doc2vec.Doc2Vec(DocsLeeCorpus(), min_count=1)
         # test saving both document and word embedding
-        test_doc_word = os.path.join(tempfile.gettempdir(), 'gensim_doc2vec.dw')
+        test_doc_word = get_tmpfile('gensim_doc2vec.dw')
         model.save_word2vec_format(test_doc_word, doctag_vec=True, word_vec=True, binary=True)
         binary_model_dv = keyedvectors.KeyedVectors.load_word2vec_format(test_doc_word, binary=True)
         self.assertEqual(len(model.wv.vocab) + len(model.docvecs), len(binary_model_dv.vocab))
         # test saving document embedding only
-        test_doc = os.path.join(tempfile.gettempdir(), 'gensim_doc2vec.d')
+        test_doc = get_tmpfile('gensim_doc2vec.d')
         model.save_word2vec_format(test_doc, doctag_vec=True, word_vec=False, binary=True)
         binary_model_dv = keyedvectors.KeyedVectors.load_word2vec_format(test_doc, binary=True)
         self.assertEqual(len(model.docvecs), len(binary_model_dv.vocab))
         # test saving word embedding only
-        test_word = os.path.join(tempfile.gettempdir(), 'gensim_doc2vec.w')
+        test_word = get_tmpfile('gensim_doc2vec.w')
         model.save_word2vec_format(test_word, doctag_vec=False, word_vec=True, binary=True)
         binary_model_dv = keyedvectors.KeyedVectors.load_word2vec_format(test_word, binary=True)
         self.assertEqual(len(model.wv.vocab), len(binary_model_dv.vocab))
@@ -106,21 +89,23 @@ def testPersistenceWord2VecFormat(self):
     def test_unicode_in_doctag(self):
         """Test storing document vectors of a model with unicode titles."""
         model = doc2vec.Doc2Vec(DocsLeeCorpus(unicode_tags=True), min_count=1)
+        tmpf = get_tmpfile('gensim_doc2vec.tst')
         try:
-            model.save_word2vec_format(testfile(), doctag_vec=True, word_vec=True, binary=True)
+            model.save_word2vec_format(tmpf, doctag_vec=True, word_vec=True, binary=True)
         except UnicodeEncodeError:
             self.fail('Failed storing unicode title.')
 
     def test_load_mmap(self):
         """Test storing/loading the entire model."""
         model = doc2vec.Doc2Vec(sentences, min_count=1)
+        tmpf = get_tmpfile('gensim_doc2vec.tst')
 
         # test storing the internal arrays into separate files
-        model.save(testfile(), sep_limit=0)
-        self.models_equal(model, doc2vec.Doc2Vec.load(testfile()))
+        model.save(tmpf, sep_limit=0)
+        self.models_equal(model, doc2vec.Doc2Vec.load(tmpf))
 
         # make sure mmaping the arrays back works, too
-        self.models_equal(model, doc2vec.Doc2Vec.load(testfile(), mmap='r'))
+        self.models_equal(model, doc2vec.Doc2Vec.load(tmpf, mmap='r'))
 
     def test_int_doctags(self):
         """Test doc2vec doctag alternatives"""
@@ -217,8 +202,9 @@ def model_sanity(self, model, keep_training=True):
 
         # keep training after save
         if keep_training:
-            model.save(testfile())
-            loaded = doc2vec.Doc2Vec.load(testfile())
+            tmpf = get_tmpfile('gensim_doc2vec.tst')
+            model.save(tmpf)
+            loaded = doc2vec.Doc2Vec.load(tmpf)
             loaded.train(sentences, total_examples=loaded.corpus_count, epochs=loaded.iter)
 
     def test_training(self):
diff --git a/gensim/test/test_dtm.py b/gensim/test/test_dtm.py
index 231bbb1932..a52766b8cd 100644
--- a/gensim/test/test_dtm.py
+++ b/gensim/test/test_dtm.py
@@ -13,11 +13,7 @@
 import sys
 import unittest
 from gensim import corpora
-
-
-# needed because sample data files are located in the same folder
-module_path = os.path.dirname(__file__)
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
+from gensim.test.utils import datapath
 
 
 class TestDtmModel(unittest.TestCase):
diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py
index 710c5a500d..d56272b4e1 100644
--- a/gensim/test/test_fasttext.py
+++ b/gensim/test/test_fasttext.py
@@ -3,7 +3,6 @@
 
 import logging
 import unittest
-import tempfile
 import os
 import struct
 
@@ -14,9 +13,8 @@
 from gensim.models.fasttext import FastText as FT_gensim
 from gensim.models.wrappers.fasttext import FastTextKeyedVectors
 from gensim.models.wrappers.fasttext import FastText as FT_wrapper
+from gensim.test.utils import datapath, get_tmpfile, common_texts as sentences
 
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 logger = logging.getLogger(__name__)
 
 IS_WIN32 = (os.name == "nt") and (struct.calcsize('P') * 8 == 32)
@@ -31,18 +29,6 @@ def __iter__(self):
 
 list_corpus = list(LeeCorpus())
 
-sentences = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-
 new_sentences = [
     ['computer', 'artificial', 'intelligence'],
     ['artificial', 'trees'],
@@ -53,11 +39,6 @@ def __iter__(self):
 ]
 
 
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_fasttext.tst')
-
-
 class TestFastTextModel(unittest.TestCase):
 
     def setUp(self):
@@ -113,29 +94,31 @@ def models_equal(self, model, model2):
 
     @unittest.skipIf(IS_WIN32, "avoid memory error with Appveyor x32")
     def test_persistence(self):
+        tmpf = get_tmpfile('gensim_fasttext.tst')
         model = FT_gensim(sentences, min_count=1)
-        model.save(testfile())
-        self.models_equal(model, FT_gensim.load(testfile()))
+        model.save(tmpf)
+        self.models_equal(model, FT_gensim.load(tmpf))
         #  test persistence of the KeyedVectors of a model
         wv = model.wv
-        wv.save(testfile())
-        loaded_wv = FastTextKeyedVectors.load(testfile())
+        wv.save(tmpf)
+        loaded_wv = FastTextKeyedVectors.load(tmpf)
         self.assertTrue(np.allclose(wv.syn0_ngrams, loaded_wv.syn0_ngrams))
         self.assertEqual(len(wv.vocab), len(loaded_wv.vocab))
         self.assertEqual(len(wv.ngrams), len(loaded_wv.ngrams))
 
     @unittest.skipIf(IS_WIN32, "avoid memory error with Appveyor x32")
     def test_norm_vectors_not_saved(self):
+        tmpf = get_tmpfile('gensim_fasttext.tst')
         model = FT_gensim(sentences, min_count=1)
         model.init_sims()
-        model.save(testfile())
-        loaded_model = FT_gensim.load(testfile())
+        model.save(tmpf)
+        loaded_model = FT_gensim.load(tmpf)
         self.assertTrue(loaded_model.wv.syn0norm is None)
         self.assertTrue(loaded_model.wv.syn0_ngrams_norm is None)
 
         wv = model.wv
-        wv.save(testfile())
-        loaded_kv = FastTextKeyedVectors.load(testfile())
+        wv.save(tmpf)
+        loaded_kv = FastTextKeyedVectors.load(tmpf)
         self.assertTrue(loaded_kv.syn0norm is None)
         self.assertTrue(loaded_kv.syn0_ngrams_norm is None)
 
@@ -362,8 +345,9 @@ def test_cbow_hs_against_wrapper(self):
             logger.info("FT_HOME env variable not set, skipping test")
             return
 
+        tmpf = get_tmpfile('gensim_fasttext.tst')
         model_wrapper = FT_wrapper.train(ft_path=self.ft_path, corpus_file=datapath('lee_background.cor'),
-            output_file=testfile(), model='cbow', size=50, alpha=0.05, window=5, min_count=5, word_ngrams=1,
+            output_file=tmpf, model='cbow', size=50, alpha=0.05, window=5, min_count=5, word_ngrams=1,
             loss='hs', sample=1e-3, negative=0, iter=5, min_n=3, max_n=6, sorted_vocab=1, threads=12)
 
         model_gensim = FT_gensim(size=50, sg=0, cbow_mean=1, alpha=0.05, window=5, hs=1, negative=0,
@@ -382,8 +366,9 @@ def test_sg_hs_against_wrapper(self):
             logger.info("FT_HOME env variable not set, skipping test")
             return
 
+        tmpf = get_tmpfile('gensim_fasttext.tst')
         model_wrapper = FT_wrapper.train(ft_path=self.ft_path, corpus_file=datapath('lee_background.cor'),
-            output_file=testfile(), model='skipgram', size=50, alpha=0.025, window=5, min_count=5, word_ngrams=1,
+            output_file=tmpf, model='skipgram', size=50, alpha=0.025, window=5, min_count=5, word_ngrams=1,
             loss='hs', sample=1e-3, negative=0, iter=5, min_n=3, max_n=6, sorted_vocab=1, threads=12)
 
         model_gensim = FT_gensim(size=50, sg=1, cbow_mean=1, alpha=0.025, window=5, hs=1, negative=0,
@@ -411,9 +396,10 @@ def test_online_learning(self):
         self.assertTrue('tif' in model_hs.wv.ngrams)  # ngram added because of the word `artificial`
 
     def test_online_learning_after_save(self):
+        tmpf = get_tmpfile('gensim_fasttext.tst')
         model_neg = FT_gensim(sentences, size=10, min_count=0, seed=42, hs=0, negative=5)
-        model_neg.save(testfile())
-        model_neg = FT_gensim.load(testfile())
+        model_neg.save(tmpf)
+        model_neg = FT_gensim.load(tmpf)
         self.assertTrue(len(model_neg.wv.vocab), 12)
         self.assertTrue(len(model_neg.wv.ngrams), 202)
         model_neg.build_vocab(new_sentences, update=True)  # update vocab
diff --git a/gensim/test/test_fasttext_wrapper.py b/gensim/test/test_fasttext_wrapper.py
index 77e10bdf99..cc6b9b9519 100644
--- a/gensim/test/test_fasttext_wrapper.py
+++ b/gensim/test/test_fasttext_wrapper.py
@@ -11,23 +11,16 @@
 import logging
 import unittest
 import os
-import tempfile
 
 import numpy
 
 from gensim.models.wrappers import fasttext
 from gensim.models import keyedvectors
+from gensim.test.utils import datapath, get_tmpfile
 
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 logger = logging.getLogger(__name__)
 
 
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_fasttext.tst')
-
-
 class TestFastText(unittest.TestCase):
     def setUp(self):
         ft_home = os.environ.get('FT_HOME', None)
@@ -55,8 +48,9 @@ def testTraining(self):
             logger.info("FT_HOME env variable not set, skipping test")
             return  # Use self.skipTest once python < 2.7 is no longer supported
         vocab_size, model_size = 1763, 10
+        tmpf = get_tmpfile('gensim_fasttext_wrapper.tst')
         trained_model = fasttext.FastText.train(
-            self.ft_path, self.corpus_file, size=model_size, output_file=testfile()
+            self.ft_path, self.corpus_file, size=model_size, output_file=tmpf
         )
 
         self.assertEqual(trained_model.wv.syn0.shape, (vocab_size, model_size))
@@ -65,20 +59,21 @@ def testTraining(self):
         self.model_sanity(trained_model)
 
         # Tests temporary training files deleted
-        self.assertFalse(os.path.exists('%s.bin' % testfile()))
+        self.assertFalse(os.path.exists('%s.bin' % tmpf))
 
     def testMinCount(self):
         """Tests words with frequency less than `min_count` absent from vocab"""
         if self.ft_path is None:
             logger.info("FT_HOME env variable not set, skipping test")
             return  # Use self.skipTest once python < 2.7 is no longer supported
+        tmpf = get_tmpfile('gensim_fasttext_wrapper.tst')
         test_model_min_count_5 = fasttext.FastText.train(
-            self.ft_path, self.corpus_file, output_file=testfile(), size=10, min_count=5
+            self.ft_path, self.corpus_file, output_file=tmpf, size=10, min_count=5
         )
         self.assertTrue('forests' not in test_model_min_count_5.wv.vocab)
 
         test_model_min_count_1 = fasttext.FastText.train(
-            self.ft_path, self.corpus_file, output_file=testfile(), size=10, min_count=1
+            self.ft_path, self.corpus_file, output_file=tmpf, size=10, min_count=1
         )
         self.assertTrue('forests' in test_model_min_count_1.wv.vocab)
 
@@ -87,8 +82,9 @@ def testModelSize(self):
         if self.ft_path is None:
             logger.info("FT_HOME env variable not set, skipping test")
             return  # Use self.skipTest once python < 2.7 is no longer supported
+        tmpf = get_tmpfile('gensim_fasttext_wrapper.tst')
         test_model_size_20 = fasttext.FastText.train(
-            self.ft_path, self.corpus_file, output_file=testfile(), size=20
+            self.ft_path, self.corpus_file, output_file=tmpf, size=20
         )
         self.assertEqual(test_model_size_20.vector_size, 20)
         self.assertEqual(test_model_size_20.wv.syn0.shape[1], 20)
@@ -96,24 +92,26 @@ def testModelSize(self):
 
     def testPersistence(self):
         """Test storing/loading the entire model."""
-        self.test_model.save(testfile())
-        loaded = fasttext.FastText.load(testfile())
+        tmpf = get_tmpfile('gensim_fasttext_wrapper.tst')
+        self.test_model.save(tmpf)
+        loaded = fasttext.FastText.load(tmpf)
         self.models_equal(self.test_model, loaded)
 
-        self.test_model.save(testfile(), sep_limit=0)
-        self.models_equal(self.test_model, fasttext.FastText.load(testfile()))
+        self.test_model.save(tmpf, sep_limit=0)
+        self.models_equal(self.test_model, fasttext.FastText.load(tmpf))
 
     def testNormalizedVectorsNotSaved(self):
         """Test syn0norm/syn0_ngrams_norm aren't saved in model file"""
+        tmpf = get_tmpfile('gensim_fasttext_wrapper.tst')
         self.test_model.init_sims()
-        self.test_model.save(testfile())
-        loaded = fasttext.FastText.load(testfile())
+        self.test_model.save(tmpf)
+        loaded = fasttext.FastText.load(tmpf)
         self.assertTrue(loaded.wv.syn0norm is None)
         self.assertTrue(loaded.wv.syn0_ngrams_norm is None)
 
         wv = self.test_model.wv
-        wv.save(testfile())
-        loaded_kv = keyedvectors.KeyedVectors.load(testfile())
+        wv.save(tmpf)
+        loaded_kv = keyedvectors.KeyedVectors.load(tmpf)
         self.assertTrue(loaded_kv.syn0norm is None)
         self.assertTrue(loaded_kv.syn0_ngrams_norm is None)
 
diff --git a/gensim/test/test_glove2word2vec.py b/gensim/test/test_glove2word2vec.py
index 07c46a7332..44548529ac 100644
--- a/gensim/test/test_glove2word2vec.py
+++ b/gensim/test/test_glove2word2vec.py
@@ -10,26 +10,18 @@
 import unittest
 import os
 import sys
-import tempfile
 
 import numpy
 import gensim
 
 from gensim.utils import check_output
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-
-def testfile():
-    # temporary model will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'glove2word2vec.test')
+from gensim.test.utils import datapath, get_tmpfile
 
 
 class TestGlove2Word2Vec(unittest.TestCase):
     def setUp(self):
         self.datapath = datapath('test_glove.txt')
-        self.output_file = testfile()
+        self.output_file = get_tmpfile('glove2word2vec.test')
 
     def testConversion(self):
         check_output(args=[
diff --git a/gensim/test/test_hdpmodel.py b/gensim/test/test_hdpmodel.py
index b3cf8bdde1..a1ed0d6dcd 100644
--- a/gensim/test/test_hdpmodel.py
+++ b/gensim/test/test_hdpmodel.py
@@ -11,39 +11,16 @@
 
 import logging
 import unittest
-import os
-import os.path
-import tempfile
 
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models import hdpmodel
 from gensim.test import basetmtests
+from gensim.test.utils import datapath, common_texts
 
 import numpy as np
 
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-
-# set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-dictionary = Dictionary(texts)
-corpus = [dictionary.doc2bow(text) for text in texts]
-
-
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
+dictionary = Dictionary(common_texts)
+corpus = [dictionary.doc2bow(text) for text in common_texts]
 
 
 class TestHdpModel(unittest.TestCase, basetmtests.TestBaseTopicModel):
diff --git a/gensim/test/test_keras_integration.py b/gensim/test/test_keras_integration.py
index 41bc74a967..3d3abd6f3b 100644
--- a/gensim/test/test_keras_integration.py
+++ b/gensim/test/test_keras_integration.py
@@ -1,5 +1,4 @@
 import unittest
-import os
 import numpy as np
 from gensim.models import word2vec
 
@@ -21,25 +20,12 @@
 except ImportError:
     raise unittest.SkipTest("Test requires Keras to be installed, which is not available")
 
-sentences = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
+from gensim.test.utils import common_texts
 
 
 class TestKerasWord2VecWrapper(unittest.TestCase):
     def setUp(self):
-        self.model_cos_sim = word2vec.Word2Vec(sentences, size=100, min_count=1, hs=1)
+        self.model_cos_sim = word2vec.Word2Vec(common_texts, size=100, min_count=1, hs=1)
         # self.model_twenty_ng = word2vec.Word2Vec(word2vec.LineSentence(datapath('20_newsgroup_keras_w2v_data.txt')), min_count=1)
         self.model_twenty_ng = word2vec.Word2Vec(min_count=1)
 
diff --git a/gensim/test/test_ldamallet_wrapper.py b/gensim/test/test_ldamallet_wrapper.py
index 5ed4486e16..b780ad42f6 100644
--- a/gensim/test/test_ldamallet_wrapper.py
+++ b/gensim/test/test_ldamallet_wrapper.py
@@ -13,7 +13,6 @@
 import unittest
 import os
 import os.path
-import tempfile
 
 import numpy as np
 
@@ -22,30 +21,10 @@
 from gensim import matutils
 from gensim.models import ldamodel
 from gensim.test import basetmtests
+from gensim.test.utils import datapath, get_tmpfile, common_texts
 
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-# set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-
-dictionary = Dictionary(texts)
-corpus = [dictionary.doc2bow(text) for text in texts]
-
-
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
+dictionary = Dictionary(common_texts)
+corpus = [dictionary.doc2bow(text) for text in common_texts]
 
 
 class TestLdaMallet(unittest.TestCase, basetmtests.TestBaseTopicModel):
@@ -125,7 +104,7 @@ def testMallet2Model(self):
     def testPersistence(self):
         if not self.mallet_path:
             return
-        fname = testfile()
+        fname = get_tmpfile('gensim_models_lda_mallet.tst')
         model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100)
         model.save(fname)
         model2 = ldamallet.LdaMallet.load(fname)
@@ -137,7 +116,7 @@ def testPersistence(self):
     def testPersistenceCompressed(self):
         if not self.mallet_path:
             return
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models_lda_mallet.tst.gz')
         model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100)
         model.save(fname)
         model2 = ldamallet.LdaMallet.load(fname, mmap=None)
@@ -149,7 +128,7 @@ def testPersistenceCompressed(self):
     def testLargeMmap(self):
         if not self.mallet_path:
             return
-        fname = testfile()
+        fname = get_tmpfile('gensim_models_lda_mallet.tst')
         model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100)
 
         # simulate storing large arrays separately
@@ -166,7 +145,7 @@ def testLargeMmap(self):
     def testLargeMmapCompressed(self):
         if not self.mallet_path:
             return
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models_lda_mallet.tst.gz')
         model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100)
 
         # simulate storing large arrays separately
diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py
index c1d35c2661..df9e0c1c72 100644
--- a/gensim/test/test_ldamodel.py
+++ b/gensim/test/test_ldamodel.py
@@ -11,9 +11,6 @@
 
 import logging
 import unittest
-import os
-import os.path
-import tempfile
 import numbers
 
 import six
@@ -23,31 +20,10 @@
 from gensim.models import ldamodel, ldamulticore
 from gensim import matutils, utils
 from gensim.test import basetmtests
+from gensim.test.utils import datapath, get_tmpfile, common_texts
 
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-
-# set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-dictionary = Dictionary(texts)
-corpus = [dictionary.doc2bow(text) for text in texts]
-
-
-def testfile(test_fname=''):
-    # temporary data will be stored to this file
-    fname = 'gensim_models_' + test_fname + '.tst'
-    return os.path.join(tempfile.gettempdir(), fname)
+dictionary = Dictionary(common_texts)
+corpus = [dictionary.doc2bow(text) for text in common_texts]
 
 
 def testRandomState():
@@ -348,22 +324,24 @@ def testPasses(self):
         # construct what we expect when passes aren't involved
         test_rhots = list()
         model = self.class_(id2word=dictionary, chunksize=1, num_topics=2)
-        final_rhot = lambda: pow(model.offset + (1 * model.num_updates) / model.chunksize, -model.decay)
+
+        def final_rhot(model):
+            return pow(model.offset + (1 * model.num_updates) / model.chunksize, -model.decay)
 
         # generate 5 updates to test rhot on
         for x in range(5):
             model.update(self.corpus)
-            test_rhots.append(final_rhot())
+            test_rhots.append(final_rhot(model))
 
         for passes in [1, 5, 10, 50, 100]:
             model = self.class_(id2word=dictionary, chunksize=1, num_topics=2, passes=passes)
-            self.assertEqual(final_rhot(), 1.0)
+            self.assertEqual(final_rhot(model), 1.0)
             # make sure the rhot matches the test after each update
             for test_rhot in test_rhots:
                 model.update(self.corpus)
 
                 msg = ", ".join(str(x) for x in [passes, model.num_updates, model.state.numdocs])
-                self.assertAlmostEqual(final_rhot(), test_rhot, msg=msg)
+                self.assertAlmostEqual(final_rhot(model), test_rhot, msg=msg)
 
             self.assertEqual(model.state.numdocs, len(corpus) * len(test_rhots))
             self.assertEqual(model.num_updates, len(corpus) * len(test_rhots))
@@ -402,7 +380,7 @@ def testPasses(self):
     #         self.assertTrue(passed)
 
     def testPersistence(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_models_lda.tst')
         model = self.model
         model.save(fname)
         model2 = self.class_.load(fname)
@@ -425,7 +403,7 @@ def testModelCompatibilityWithPythonVersions(self):
         self.assertEqual(set(id2word_2_7.keys()), set(id2word_3_5.keys()))
 
     def testPersistenceIgnore(self):
-        fname = testfile('testPersistenceIgnore')
+        fname = get_tmpfile('gensim_models_lda_testPersistenceIgnore.tst')
         model = ldamodel.LdaModel(self.corpus, num_topics=2)
         model.save(fname, ignore='id2word')
         model2 = ldamodel.LdaModel.load(fname)
@@ -436,7 +414,7 @@ def testPersistenceIgnore(self):
         self.assertTrue(model2.id2word is None)
 
     def testPersistenceCompressed(self):
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models_lda.tst.gz')
         model = self.model
         model.save(fname)
         model2 = self.class_.load(fname, mmap=None)
@@ -446,7 +424,7 @@ def testPersistenceCompressed(self):
         self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testLargeMmap(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_models_lda.tst')
         model = self.model
 
         # simulate storing large arrays separately
@@ -461,7 +439,7 @@ def testLargeMmap(self):
         self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testLargeMmapCompressed(self):
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models_lda.tst.gz')
         model = self.model
 
         # simulate storing large arrays separately
@@ -483,7 +461,7 @@ def testRandomStateBackwardCompatibility(self):
             self.assertTrue(isinstance(i[1], six.string_types))
 
         # save back the loaded model using a post-0.13.2 version of Gensim
-        post_0_13_2_fname = testfile('post_0_13_2_model')
+        post_0_13_2_fname = get_tmpfile('gensim_models_lda_post_0_13_2_model.tst')
         model_pre_0_13_2.save(post_0_13_2_fname)
 
         # load a model saved using a post-0.13.2 version of Gensim
diff --git a/gensim/test/test_ldaseqmodel.py b/gensim/test/test_ldaseqmodel.py
index d38c01868c..eac238dcdc 100644
--- a/gensim/test/test_ldaseqmodel.py
+++ b/gensim/test/test_ldaseqmodel.py
@@ -3,17 +3,13 @@
 Tests to check DTM math functions and Topic-Word, Doc-Topic proportions.
 
 """
+import unittest
+import logging
 
 import numpy as np  # for arrays, array broadcasting etc.
 from gensim.models import ldaseqmodel
 from gensim.corpora import Dictionary
-import os.path
-import unittest
-import logging
-
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data/DTM', fname)
+from gensim.test.utils import datapath
 
 
 class TestLdaSeq(unittest.TestCase):
@@ -203,7 +199,7 @@ def setUp(self):
             ['bank', 'loan', 'sell']
         ]
         # initializing using own LDA sufficient statistics so that we get same results each time.
-        sstats = np.loadtxt(datapath('sstats_test.txt'))
+        sstats = np.loadtxt(datapath('DTM/sstats_test.txt'))
         dictionary = Dictionary(texts)
         corpus = [dictionary.doc2bow(text) for text in texts]
         self.ldaseq = ldaseqmodel.LdaSeqModel(
diff --git a/gensim/test/test_ldavowpalwabbit_wrapper.py b/gensim/test/test_ldavowpalwabbit_wrapper.py
index d14723de59..5f898246e4 100644
--- a/gensim/test/test_ldavowpalwabbit_wrapper.py
+++ b/gensim/test/test_ldavowpalwabbit_wrapper.py
@@ -25,10 +25,7 @@
 
 import gensim.models.wrappers.ldavowpalwabbit as ldavowpalwabbit
 from gensim.models.wrappers.ldavowpalwabbit import LdaVowpalWabbit
-
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
+from gensim.test.utils import datapath
 
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
diff --git a/gensim/test/test_lee.py b/gensim/test/test_lee.py
index 33cce71e52..ff0de9dc3f 100644
--- a/gensim/test/test_lee.py
+++ b/gensim/test/test_lee.py
@@ -27,6 +27,7 @@
 import logging
 import os.path
 import unittest
+from functools import partial
 
 import numpy as np
 
@@ -50,7 +51,7 @@ def setUp(self):
         sim_file = 'similarities0-1.txt'
 
         # read in the corpora
-        latin1 = lambda line: utils.to_unicode(line, encoding='latin1')
+        latin1 = partial(utils.to_unicode, encoding='latin1')
         with utils.smart_open(os.path.join(pre_path, bg_corpus_file)) as f:
             bg_corpus = preprocess_documents(latin1(line) for line in f)
         with utils.smart_open(os.path.join(pre_path, corpus_file)) as f:
diff --git a/gensim/test/test_logentropy_model.py b/gensim/test/test_logentropy_model.py
index 22ca09be0d..bc64f1b2d1 100644
--- a/gensim/test/test_logentropy_model.py
+++ b/gensim/test/test_logentropy_model.py
@@ -11,44 +11,17 @@
 
 import logging
 import unittest
-import os
-import os.path
-import tempfile
-
 import numpy as np
 
-from gensim.corpora import mmcorpus, Dictionary
+from gensim.corpora.mmcorpus import MmCorpus
 from gensim.models import logentropy_model
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-
-# set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-dictionary = Dictionary(texts)
-corpus = [dictionary.doc2bow(text) for text in texts]
-
-
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
+from gensim.test.utils import datapath, get_tmpfile
 
 
 class TestLogEntropyModel(unittest.TestCase):
     def setUp(self):
-        self.corpus_small = mmcorpus.MmCorpus(datapath('test_corpus_small.mm'))
-        self.corpus_ok = mmcorpus.MmCorpus(datapath('test_corpus_ok.mm'))
+        self.corpus_small = MmCorpus(datapath('test_corpus_small.mm'))
+        self.corpus_ok = MmCorpus(datapath('test_corpus_ok.mm'))
 
     def testTransform(self):
         # create the transformation model
@@ -66,7 +39,7 @@ def testTransform(self):
         self.assertTrue(np.allclose(transformed, expected))
 
     def testPersistence(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_models_logentry.tst')
         model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True)
         model.save(fname)
         model2 = logentropy_model.LogEntropyModel.load(fname)
@@ -75,7 +48,7 @@ def testPersistence(self):
         self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))
 
     def testPersistenceCompressed(self):
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models_logentry.tst.gz')
         model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True)
         model.save(fname)
         model2 = logentropy_model.LogEntropyModel.load(fname, mmap=None)
diff --git a/gensim/test/test_lsimodel.py b/gensim/test/test_lsimodel.py
index ed537feaa8..3ff59c1e43 100644
--- a/gensim/test/test_lsimodel.py
+++ b/gensim/test/test_lsimodel.py
@@ -10,50 +10,21 @@
 
 
 import logging
-import os
-import os.path
-import tempfile
 import unittest
 
 import numpy as np
 import scipy.linalg
 
 from gensim import matutils
-from gensim.corpora import mmcorpus, Dictionary
+from gensim.corpora.mmcorpus import MmCorpus
 from gensim.models import lsimodel
 from gensim.test import basetmtests
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-
-
-def datapath(fname):
-    return os.path.join(module_path, 'test_data', fname)
-
-
-# set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-dictionary = Dictionary(texts)
-corpus = [dictionary.doc2bow(text) for text in texts]
-
-
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
+from gensim.test.utils import datapath, get_tmpfile
 
 
 class TestLsiModel(unittest.TestCase, basetmtests.TestBaseTopicModel):
     def setUp(self):
-        self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
+        self.corpus = MmCorpus(datapath('testcorpus.mm'))
         self.model = lsimodel.LsiModel(self.corpus, num_topics=2)
 
     def testTransform(self):
@@ -143,7 +114,7 @@ def testOnlineTransform(self):
         self.assertTrue(np.allclose(abs(vec1), abs(vec2), atol=1e-5))  # the two LSI representations must equal up to sign
 
     def testPersistence(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_models_lsi.tst')
         model = self.model
         model.save(fname)
         model2 = lsimodel.LsiModel.load(fname)
@@ -154,7 +125,7 @@ def testPersistence(self):
         self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testPersistenceCompressed(self):
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models_lsi.tst.gz')
         model = self.model
         model.save(fname)
         model2 = lsimodel.LsiModel.load(fname, mmap=None)
@@ -165,7 +136,7 @@ def testPersistenceCompressed(self):
         self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testLargeMmap(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_models_lsi.tst')
         model = self.model
 
         # test storing the internal arrays into separate files
@@ -182,7 +153,7 @@ def testLargeMmap(self):
         self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testLargeMmapCompressed(self):
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models_lsi.tst.gz')
         model = self.model
 
         # test storing the internal arrays into separate files
diff --git a/gensim/test/test_miislita.py b/gensim/test/test_miislita.py
index dd660f629f..344da1adb3 100644
--- a/gensim/test/test_miislita.py
+++ b/gensim/test/test_miislita.py
@@ -17,22 +17,14 @@
 
 import logging
 import os
-import tempfile
 import unittest
 
 from gensim import utils, corpora, models, similarities
-
-# sample data files are located in the same folder
-module_path = os.path.dirname(__file__)
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
+from gensim.test.utils import datapath, get_tmpfile
 
 logger = logging.getLogger('test_miislita')
 
 
-def get_tmpfile(suffix):
-    return os.path.join(tempfile.gettempdir(), suffix)
-
-
 class CorpusMiislita(corpora.TextCorpus):
     stoplist = set('for a of the and to in on'.split())
 
diff --git a/gensim/test/test_normmodel.py b/gensim/test/test_normmodel.py
index 339680d085..fa7a4096fd 100644
--- a/gensim/test/test_normmodel.py
+++ b/gensim/test/test_normmodel.py
@@ -11,9 +11,6 @@
 
 import logging
 import unittest
-import os
-import os.path
-import tempfile
 
 import numpy as np
 from scipy.sparse import csr_matrix
@@ -21,14 +18,7 @@
 
 from gensim.corpora import mmcorpus
 from gensim.models import normmodel
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
+from gensim.test.utils import datapath, get_tmpfile
 
 
 class TestNormModel(unittest.TestCase):
@@ -140,7 +130,7 @@ def testInit(self):
         self.assertRaises(ValueError, normmodel.NormModel, self.corpus, 'l0')
 
     def testPersistence(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_models.tst')
         model = normmodel.NormModel(self.corpus)
         model.save(fname)
         model2 = normmodel.NormModel.load(fname)
@@ -149,7 +139,7 @@ def testPersistence(self):
         self.assertTrue(np.allclose(model.normalize(tstvec), model2.normalize(tstvec)))  # try projecting an empty vector
 
     def testPersistenceCompressed(self):
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models.tst.gz')
         model = normmodel.NormModel(self.corpus)
         model.save(fname)
         model2 = normmodel.NormModel.load(fname, mmap=None)
diff --git a/gensim/test/test_phrases.py b/gensim/test/test_phrases.py
index faf0127297..e3a69760ca 100644
--- a/gensim/test/test_phrases.py
+++ b/gensim/test/test_phrases.py
@@ -15,13 +15,11 @@
 
 from gensim import utils
 from gensim.models.phrases import SentenceAnalyzer, Phrases, Phraser, pseudocorpus
+from gensim.test.utils import common_texts
 
 if sys.version_info[0] >= 3:
     unicode = str
 
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
 
 class TestUtils(unittest.TestCase):
 
@@ -137,17 +135,8 @@ def test_analysis_common_terms_in_between(self):
 
 
 class PhrasesData:
-    sentences = [
-        ['human', 'interface', 'computer'],
-        ['survey', 'user', 'computer', 'system', 'response', 'time'],
-        ['eps', 'user', 'interface', 'system'],
-        ['system', 'human', 'system', 'eps'],
-        ['user', 'response', 'time'],
-        ['trees'],
-        ['graph', 'trees'],
-        ['graph', 'minors', 'trees'],
-        ['graph', 'minors', 'survey'],
-        ['graph', 'minors', 'survey', 'human', 'interface']  # test bigrams within same sentence
+    sentences = common_texts + [
+        ['graph', 'minors', 'survey', 'human', 'interface']
     ]
     unicode_sentences = [[utils.to_unicode(w) for w in sentence] for sentence in sentences]
     common_terms = frozenset()
diff --git a/gensim/test/test_rpmodel.py b/gensim/test/test_rpmodel.py
index 94c1abce84..6d09dbcb84 100644
--- a/gensim/test/test_rpmodel.py
+++ b/gensim/test/test_rpmodel.py
@@ -11,44 +11,18 @@
 
 import logging
 import unittest
-import os
-import os.path
-import tempfile
 
 import numpy as np
 
-from gensim.corpora import mmcorpus, Dictionary
+from gensim.corpora.mmcorpus import MmCorpus
 from gensim.models import rpmodel
 from gensim import matutils
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-
-# set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-dictionary = Dictionary(texts)
-corpus = [dictionary.doc2bow(text) for text in texts]
-
-
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
+from gensim.test.utils import datapath, get_tmpfile
 
 
 class TestRpModel(unittest.TestCase):
     def setUp(self):
-        self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
+        self.corpus = MmCorpus(datapath('testcorpus.mm'))
 
     def testTransform(self):
         # create the transformation model
@@ -64,7 +38,7 @@ def testTransform(self):
         self.assertTrue(np.allclose(vec, expected))  # transformed entries must be equal up to sign
 
     def testPersistence(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_models.tst')
         model = rpmodel.RpModel(self.corpus, num_topics=2)
         model.save(fname)
         model2 = rpmodel.RpModel.load(fname)
@@ -74,7 +48,7 @@ def testPersistence(self):
         self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testPersistenceCompressed(self):
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models.tst.gz')
         model = rpmodel.RpModel(self.corpus, num_topics=2)
         model.save(fname)
         model2 = rpmodel.RpModel.load(fname, mmap=None)
diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py
index 93c0f8a3f7..5c54685c8e 100644
--- a/gensim/test/test_similarities.py
+++ b/gensim/test/test_similarities.py
@@ -12,18 +12,18 @@
 import logging
 import unittest
 import os
-import tempfile
 
 import numpy
 import scipy
 
-from gensim.corpora import Dictionary
 from gensim.models import word2vec
 from gensim.models import doc2vec
 from gensim.models import KeyedVectors
 from gensim.models.wrappers import fasttext
 from gensim import matutils, similarities
 from gensim.models import Word2Vec
+from gensim.test.utils import (datapath, get_tmpfile,
+    common_texts as texts, common_dictionary as dictionary, common_corpus as corpus)
 
 try:
     from pyemd import emd  # noqa:F401
@@ -31,33 +31,9 @@
 except ImportError:
     PYEMD_EXT = False
 
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-
-# set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-dictionary = Dictionary(texts)
-corpus = [dictionary.doc2bow(text) for text in texts]
-
 sentences = [doc2vec.TaggedDocument(words, [i]) for i, words in enumerate(texts)]
 
 
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_similarities.tst.pkl')
-
-
 class _TestSimilarityABC(object):
     """
     Base class for SparseMatrixSimilarity and MatrixSimilarity unit tests.
@@ -178,7 +154,7 @@ def testPersistency(self):
         if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
             return
 
-        fname = testfile()
+        fname = get_tmpfile('gensim_similarities.tst.pkl')
         if self.cls == similarities.Similarity:
             index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
         elif self.cls == similarities.WmdSimilarity:
@@ -203,7 +179,7 @@ def testPersistencyCompressed(self):
         if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
             return
 
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_similarities.tst.pkl.gz')
         if self.cls == similarities.Similarity:
             index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
         elif self.cls == similarities.WmdSimilarity:
@@ -228,7 +204,7 @@ def testLarge(self):
         if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
             return
 
-        fname = testfile()
+        fname = get_tmpfile('gensim_similarities.tst.pkl')
         if self.cls == similarities.Similarity:
             index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
         elif self.cls == similarities.WmdSimilarity:
@@ -255,7 +231,7 @@ def testLargeCompressed(self):
         if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
             return
 
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_similarities.tst.pkl.gz')
         if self.cls == similarities.Similarity:
             index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
         elif self.cls == similarities.WmdSimilarity:
@@ -282,7 +258,7 @@ def testMmap(self):
         if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
             return
 
-        fname = testfile()
+        fname = get_tmpfile('gensim_similarities.tst.pkl')
         if self.cls == similarities.Similarity:
             index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
         elif self.cls == similarities.WmdSimilarity:
@@ -310,7 +286,7 @@ def testMmapCompressed(self):
         if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
             return
 
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_similarities.tst.pkl.gz')
         if self.cls == similarities.Similarity:
             index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
         elif self.cls == similarities.WmdSimilarity:
@@ -545,7 +521,7 @@ def assertApproxNeighborsMatchExact(self, model, wv, index):
         self.assertEqual(approx_words, exact_words)
 
     def assertIndexSaved(self, index):
-        fname = testfile()
+        fname = get_tmpfile('gensim_similarities.tst.pkl')
         index.save(fname)
         self.assertTrue(os.path.exists(fname))
         self.assertTrue(os.path.exists(fname + '.d'))
@@ -553,7 +529,7 @@ def assertIndexSaved(self, index):
     def assertLoadedIndexEqual(self, index, model):
         from gensim.similarities.index import AnnoyIndexer
 
-        fname = testfile()
+        fname = get_tmpfile('gensim_similarities.tst.pkl')
         index.save(fname)
 
         index2 = AnnoyIndexer()
@@ -598,7 +574,7 @@ def testApproxNeighborsMatchExact(self):
         self.assertEqual(approx_words, exact_words)
 
     def testSave(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_similarities.tst.pkl')
         self.index.save(fname)
         self.assertTrue(os.path.exists(fname))
         self.assertTrue(os.path.exists(fname + '.d'))
@@ -612,7 +588,7 @@ def testLoadNotExist(self):
     def testSaveLoad(self):
         from gensim.similarities.index import AnnoyIndexer
 
-        fname = testfile()
+        fname = get_tmpfile('gensim_similarities.tst.pkl')
         self.index.save(fname)
 
         self.index2 = AnnoyIndexer()
diff --git a/gensim/test/test_similarity_metrics.py b/gensim/test/test_similarity_metrics.py
index 27066ff09d..2f7b39857c 100644
--- a/gensim/test/test_similarity_metrics.py
+++ b/gensim/test/test_similarity_metrics.py
@@ -16,27 +16,9 @@
 from scipy.sparse import csr_matrix
 import numpy as np
 import math
-import os
-from gensim.corpora import mmcorpus, Dictionary
+from gensim.corpora.mmcorpus import MmCorpus
 from gensim.models import ldamodel
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-# set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-dictionary = Dictionary(texts)
-corpus = [dictionary.doc2bow(text) for text in texts]
+from gensim.test.utils import datapath, common_dictionary, common_corpus
 
 
 class TestIsBow(unittest.TestCase):
@@ -94,9 +76,9 @@ def test_bow(self):
 
 class TestHellinger(unittest.TestCase):
     def setUp(self):
-        self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
+        self.corpus = MmCorpus(datapath('testcorpus.mm'))
         self.class_ = ldamodel.LdaModel
-        self.model = self.class_(corpus, id2word=dictionary, num_topics=2, passes=100)
+        self.model = self.class_(common_corpus, id2word=common_dictionary, num_topics=2, passes=100)
 
     def test_inputs(self):
 
@@ -146,7 +128,7 @@ def test_distributions(self):
 
         # testing LDA distribution vectors
         np.random.seed(0)
-        model = self.class_(self.corpus, id2word=dictionary, num_topics=2, passes=100)
+        model = self.class_(self.corpus, id2word=common_dictionary, num_topics=2, passes=100)
         lda_vec1 = model[[(1, 2), (2, 3)]]
         lda_vec2 = model[[(2, 2), (1, 3)]]
         result = matutils.hellinger(lda_vec1, lda_vec2)
@@ -156,9 +138,9 @@ def test_distributions(self):
 
 class TestKL(unittest.TestCase):
     def setUp(self):
-        self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
+        self.corpus = MmCorpus(datapath('testcorpus.mm'))
         self.class_ = ldamodel.LdaModel
-        self.model = self.class_(corpus, id2word=dictionary, num_topics=2, passes=100)
+        self.model = self.class_(common_corpus, id2word=common_dictionary, num_topics=2, passes=100)
 
     def test_inputs(self):
 
@@ -214,7 +196,7 @@ def test_distributions(self):
 
         # testing LDA distribution vectors
         np.random.seed(0)
-        model = self.class_(self.corpus, id2word=dictionary, num_topics=2, passes=100)
+        model = self.class_(self.corpus, id2word=common_dictionary, num_topics=2, passes=100)
         lda_vec1 = model[[(1, 2), (2, 3)]]
         lda_vec2 = model[[(2, 2), (1, 3)]]
         result = matutils.kullback_leibler(lda_vec1, lda_vec2)
diff --git a/gensim/test/test_sklearn_api.py b/gensim/test/test_sklearn_api.py
index c3209ece10..ff4a6a2202 100644
--- a/gensim/test/test_sklearn_api.py
+++ b/gensim/test/test_sklearn_api.py
@@ -1,6 +1,5 @@
 import unittest
 import numpy
-import os
 import codecs
 import pickle
 
@@ -25,10 +24,7 @@
 from gensim.sklearn_api.phrases import PhrasesTransformer
 from gensim.corpora import mmcorpus, Dictionary
 from gensim import matutils, models
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-datapath_ldaseq = lambda fname: os.path.join(module_path, 'test_data/DTM', fname)
+from gensim.test.utils import datapath, common_texts
 
 texts = [
     ['complier', 'system', 'computer'],
@@ -114,28 +110,9 @@
 
 d2v_sentences = [models.doc2vec.TaggedDocument(words, [i]) for i, words in enumerate(w2v_texts)]
 
-dict_texts = [
-    'human interface computer',
-    'survey user computer system response time',
-    'eps user interface system',
-    'system human system eps',
-    'user response time',
-    'trees',
-    'graph trees',
-    'graph minors trees',
-    'graph minors survey'
-]
+dict_texts = [' '.join(text) for text in common_texts]
 
-phrases_sentences = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey'],
+phrases_sentences = common_texts + [
     ['graph', 'minors', 'survey', 'human', 'interface']
 ]
 
diff --git a/gensim/test/test_text_analysis.py b/gensim/test/test_text_analysis.py
index 93f00ae3a8..83df8ece57 100644
--- a/gensim/test/test_text_analysis.py
+++ b/gensim/test/test_text_analysis.py
@@ -5,6 +5,7 @@
 from gensim.topic_coherence.text_analysis import (
     InvertedIndexAccumulator, WordOccurrenceAccumulator, ParallelWordOccurrenceAccumulator,
     CorpusAccumulator)
+from gensim.test.utils import common_texts
 
 
 class BaseTestCases(object):
@@ -28,18 +29,7 @@ class TextAnalyzerTestBase(unittest.TestCase):
         dictionary.id2token = {v: k for k, v in token2id.items()}
         top_ids = set(token2id.values())
 
-        texts2 = [
-            ['human', 'interface', 'computer'],
-            ['survey', 'user', 'computer', 'system', 'response', 'time'],
-            ['eps', 'user', 'interface', 'system'],
-            ['system', 'human', 'system', 'eps'],
-            ['user', 'response', 'time'],
-            ['trees'],
-            ['graph', 'trees'],
-            ['graph', 'minors', 'trees'],
-            ['graph', 'minors', 'survey'],
-            ['user', 'user']
-        ]
+        texts2 = common_texts + [['user', 'user']]
         dictionary2 = Dictionary(texts2)
         dictionary2.id2token = {v: k for k, v in dictionary2.token2id.items()}
         top_ids2 = set(dictionary2.token2id.values())
diff --git a/gensim/test/test_tfidfmodel.py b/gensim/test/test_tfidfmodel.py
index 65e2939857..c308923c29 100644
--- a/gensim/test/test_tfidfmodel.py
+++ b/gensim/test/test_tfidfmodel.py
@@ -11,43 +11,17 @@
 
 import logging
 import unittest
-import os
-import os.path
-import tempfile
 
 import numpy as np
 
-from gensim.corpora import mmcorpus, Dictionary
+from gensim.corpora.mmcorpus import MmCorpus
 from gensim.models import tfidfmodel
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-
-# set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-dictionary = Dictionary(texts)
-corpus = [dictionary.doc2bow(text) for text in texts]
-
-
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
+from gensim.test.utils import datapath, get_tmpfile, common_dictionary, common_corpus
 
 
 class TestTfidfModel(unittest.TestCase):
     def setUp(self):
-        self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
+        self.corpus = MmCorpus(datapath('testcorpus.mm'))
 
     def testTransform(self):
         # create the transformation model
@@ -63,19 +37,20 @@ def testTransform(self):
     def testInit(self):
         # create the transformation model by analyzing a corpus
         # uses the global `corpus`!
-        model1 = tfidfmodel.TfidfModel(corpus)
+        model1 = tfidfmodel.TfidfModel(common_corpus)
+        dfs = common_dictionary.dfs
 
         # make sure the dfs<->idfs transformation works
-        self.assertEqual(model1.dfs, dictionary.dfs)
-        self.assertEqual(model1.idfs, tfidfmodel.precompute_idfs(model1.wglobal, dictionary.dfs, len(corpus)))
+        self.assertEqual(model1.dfs, dfs)
+        self.assertEqual(model1.idfs, tfidfmodel.precompute_idfs(model1.wglobal, dfs, len(common_corpus)))
 
         # create the transformation model by directly supplying a term->docfreq
         # mapping from the global var `dictionary`.
-        model2 = tfidfmodel.TfidfModel(dictionary=dictionary)
+        model2 = tfidfmodel.TfidfModel(dictionary=common_dictionary)
         self.assertEqual(model1.idfs, model2.idfs)
 
     def testPersistence(self):
-        fname = testfile()
+        fname = get_tmpfile('gensim_models.tst')
         model = tfidfmodel.TfidfModel(self.corpus, normalize=True)
         model.save(fname)
         model2 = tfidfmodel.TfidfModel.load(fname)
@@ -84,7 +59,7 @@ def testPersistence(self):
         self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testPersistenceCompressed(self):
-        fname = testfile() + '.gz'
+        fname = get_tmpfile('gensim_models.tst.gz')
         model = tfidfmodel.TfidfModel(self.corpus, normalize=True)
         model.save(fname)
         model2 = tfidfmodel.TfidfModel.load(fname, mmap=None)
diff --git a/gensim/test/test_tmdiff.py b/gensim/test/test_tmdiff.py
index 67ba174361..f49c930a63 100644
--- a/gensim/test/test_tmdiff.py
+++ b/gensim/test/test_tmdiff.py
@@ -8,25 +8,14 @@
 import unittest
 import numpy as np
 
-from gensim.corpora import Dictionary
 from gensim.models import LdaModel
+from gensim.test.utils import common_dictionary, common_corpus
 
 
 class TestLdaDiff(unittest.TestCase):
     def setUp(self):
-        texts = [
-            ['human', 'interface', 'computer'],
-            ['survey', 'user', 'computer', 'system', 'response', 'time'],
-            ['eps', 'user', 'interface', 'system'],
-            ['system', 'human', 'system', 'eps'],
-            ['user', 'response', 'time'],
-            ['trees'],
-            ['graph', 'trees'],
-            ['graph', 'minors', 'trees'],
-            ['graph', 'minors', 'survey'],
-        ]
-        self.dictionary = Dictionary(texts)
-        self.corpus = [self.dictionary.doc2bow(text) for text in texts]
+        self.dictionary = common_dictionary
+        self.corpus = common_corpus
         self.num_topics = 5
         self.n_ann_terms = 10
         self.model = LdaModel(corpus=self.corpus, id2word=self.dictionary, num_topics=self.num_topics, passes=10)
diff --git a/gensim/test/test_translation_matrix.py b/gensim/test/test_translation_matrix.py
index b6c24d1c4b..2c68f2c5c1 100644
--- a/gensim/test/test_translation_matrix.py
+++ b/gensim/test/test_translation_matrix.py
@@ -1,25 +1,17 @@
 #!/usr/bin/env python
 # encoding: utf-8
-import os
+from collections import namedtuple
 import unittest
-import tempfile
-import numpy as np
 import math
 
+import numpy as np
+
 from scipy.spatial.distance import cosine
-from collections import namedtuple
 from gensim.models.doc2vec import Doc2Vec
 from gensim import utils
 from gensim.models import translation_matrix
 from gensim.models import KeyedVectors
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-
-def temp_save_file():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'transmat-en-it.pkl')
+from gensim.test.utils import datapath, get_tmpfile
 
 
 class TestTranslationMatrix(unittest.TestCase):
@@ -45,11 +37,13 @@ def test_translation_matrix(self):
 
     def testPersistence(self):
         """Test storing/loading the entire model."""
+        tmpf = get_tmpfile('transmat-en-it.pkl')
+
         model = translation_matrix.TranslationMatrix(self.source_word_vec, self.target_word_vec, self.word_pairs)
         model.train(self.word_pairs)
-        model.save(temp_save_file())
+        model.save(tmpf)
 
-        loaded_model = translation_matrix.TranslationMatrix.load(temp_save_file())
+        loaded_model = translation_matrix.TranslationMatrix.load(tmpf)
         self.assertTrue(np.allclose(model.translation_matrix, loaded_model.translation_matrix))
 
     def test_translate_nn(self):
diff --git a/gensim/test/test_varembed_wrapper.py b/gensim/test/test_varembed_wrapper.py
index 2053f7ffc3..c94c2bbcdb 100644
--- a/gensim/test/test_varembed_wrapper.py
+++ b/gensim/test/test_varembed_wrapper.py
@@ -11,7 +11,6 @@
 """
 
 import logging
-import os
 import sys
 
 import numpy as np
@@ -19,15 +18,14 @@
 import unittest
 
 from gensim.models.wrappers import varembed
+from gensim.test.utils import datapath
 
 try:
     import morfessor  # noqa: F401
 except ImportError:
     raise unittest.SkipTest("Test requires Morfessor to be installed, which is not available")
 
-# needed because sample data files are located in the same folder
-module_path = os.path.dirname(__file__)
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
+
 varembed_model_vector_file = datapath('varembed_vectors.pkl')
 varembed_model_morfessor_file = datapath('varembed_morfessor.bin')
 
diff --git a/gensim/test/test_wikicorpus.py b/gensim/test/test_wikicorpus.py
index ede577a52b..e7b7b14011 100644
--- a/gensim/test/test_wikicorpus.py
+++ b/gensim/test/test_wikicorpus.py
@@ -9,15 +9,13 @@
 """
 
 
-import os
 import logging
 import unittest
 
 from gensim.corpora.wikicorpus import WikiCorpus
 from gensim import utils
+from gensim.test.utils import datapath
 
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 FILENAME = 'enwiki-latest-pages-articles1.xml-p000000010p000030302-shortened.bz2'
 FILENAME_U = 'bgwiki-latest-pages-articles-shortened.xml.bz2'
 
diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py
index 4c642ce5d2..a3720237c3 100644
--- a/gensim/test/test_word2vec.py
+++ b/gensim/test/test_word2vec.py
@@ -12,7 +12,6 @@
 import logging
 import unittest
 import os
-import tempfile
 import bz2
 import sys
 
@@ -20,6 +19,7 @@
 
 from gensim import utils
 from gensim.models import word2vec, keyedvectors
+from gensim.test.utils import datapath, get_tmpfile, common_texts as sentences
 from testfixtures import log_capture
 
 try:
@@ -28,9 +28,6 @@
 except ImportError:
     PYEMD_EXT = False
 
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
 
 class LeeCorpus(object):
     def __iter__(self):
@@ -41,18 +38,6 @@ def __iter__(self):
 
 list_corpus = list(LeeCorpus())
 
-sentences = [
-    ['human', 'interface', 'computer'],
-    ['survey', 'user', 'computer', 'system', 'response', 'time'],
-    ['eps', 'user', 'interface', 'system'],
-    ['system', 'human', 'system', 'eps'],
-    ['user', 'response', 'time'],
-    ['trees'],
-    ['graph', 'trees'],
-    ['graph', 'minors', 'trees'],
-    ['graph', 'minors', 'survey']
-]
-
 new_sentences = [
     ['computer', 'artificial', 'intelligence'],
     ['artificial', 'trees'],
@@ -63,11 +48,6 @@ def __iter__(self):
 ]
 
 
-def testfile():
-    # temporary data will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_word2vec.tst')
-
-
 def _rule(word, count, min_count):
     if word == "human":
         return utils.RULE_DISCARD  # throw out
@@ -77,10 +57,11 @@ def _rule(word, count, min_count):
 
 def load_on_instance():
     # Save and load a Word2Vec Model on instance for test
+    tmpf = get_tmpfile('gensim_word2vec.tst')
     model = word2vec.Word2Vec(sentences, min_count=1)
-    model.save(testfile())
+    model.save(tmpf)
     model = word2vec.Word2Vec()  # should fail at this point
-    return model.load(testfile())
+    return model.load(tmpf)
 
 
 class TestWord2VecModel(unittest.TestCase):
@@ -148,9 +129,10 @@ def testOnlineLearning(self):
     def testOnlineLearningAfterSave(self):
         """Test that the algorithm is able to add new words to the
         vocabulary and to a trained model when using a sorted vocabulary"""
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model_neg = word2vec.Word2Vec(sentences, size=10, min_count=0, seed=42, hs=0, negative=5)
-        model_neg.save(testfile())
-        model_neg = word2vec.Word2Vec.load(testfile())
+        model_neg.save(tmpf)
+        model_neg = word2vec.Word2Vec.load(tmpf)
         self.assertTrue(len(model_neg.wv.vocab), 12)
         model_neg.build_vocab(new_sentences, update=True)
         model_neg.train(new_sentences, total_examples=model_neg.corpus_count, epochs=model_neg.iter)
@@ -203,21 +185,23 @@ def test_cbow_neg_online(self):
 
     def testPersistence(self):
         """Test storing/loading the entire model."""
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1)
-        model.save(testfile())
-        self.models_equal(model, word2vec.Word2Vec.load(testfile()))
+        model.save(tmpf)
+        self.models_equal(model, word2vec.Word2Vec.load(tmpf))
         #  test persistence of the KeyedVectors of a model
         wv = model.wv
-        wv.save(testfile())
-        loaded_wv = keyedvectors.KeyedVectors.load(testfile())
+        wv.save(tmpf)
+        loaded_wv = keyedvectors.KeyedVectors.load(tmpf)
         self.assertTrue(np.allclose(wv.syn0, loaded_wv.syn0))
         self.assertEqual(len(wv.vocab), len(loaded_wv.vocab))
 
     def testPersistenceWithConstructorRule(self):
         """Test storing/loading the entire model with a vocab trimming rule passed in the constructor."""
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1, trim_rule=_rule)
-        model.save(testfile())
-        self.models_equal(model, word2vec.Word2Vec.load(testfile()))
+        model.save(tmpf)
+        self.models_equal(model, word2vec.Word2Vec.load(tmpf))
 
     def testRuleWithMinCount(self):
         """Test that returning RULE_DEFAULT from trim_rule triggers min_count."""
@@ -234,21 +218,24 @@ def testRule(self):
 
     def testLambdaRule(self):
         """Test that lambda trim_rule works."""
-        rule = lambda word, count, min_count: utils.RULE_DISCARD if word == "human" else utils.RULE_DEFAULT
+        def rule(word, count, min_count):
+            return utils.RULE_DISCARD if word == "human" else utils.RULE_DEFAULT
+
         model = word2vec.Word2Vec(sentences, min_count=1, trim_rule=rule)
         self.assertTrue("human" not in model.wv.vocab)
 
     def testSyn0NormNotSaved(self):
         """Test syn0norm isn't saved in model file"""
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1)
         model.init_sims()
-        model.save(testfile())
-        loaded_model = word2vec.Word2Vec.load(testfile())
+        model.save(tmpf)
+        loaded_model = word2vec.Word2Vec.load(tmpf)
         self.assertTrue(loaded_model.wv.syn0norm is None)
 
         wv = model.wv
-        wv.save(testfile())
-        loaded_kv = keyedvectors.KeyedVectors.load(testfile())
+        wv.save(tmpf)
+        loaded_kv = keyedvectors.KeyedVectors.load(tmpf)
         self.assertTrue(loaded_kv.syn0norm is None)
 
     def testLoadPreKeyedVectorModel(self):
@@ -280,34 +267,36 @@ def testLoadPreKeyedVectorModelCFormat(self):
 
     def testPersistenceWord2VecFormat(self):
         """Test storing/loading the entire model in word2vec format."""
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1)
         model.init_sims()
-        model.wv.save_word2vec_format(testfile(), binary=True)
-        binary_model_kv = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), binary=True)
+        model.wv.save_word2vec_format(tmpf, binary=True)
+        binary_model_kv = keyedvectors.KeyedVectors.load_word2vec_format(tmpf, binary=True)
         binary_model_kv.init_sims(replace=False)
         self.assertTrue(np.allclose(model['human'], binary_model_kv['human']))
-        norm_only_model = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), binary=True)
+        norm_only_model = keyedvectors.KeyedVectors.load_word2vec_format(tmpf, binary=True)
         norm_only_model.init_sims(replace=True)
         self.assertFalse(np.allclose(model['human'], norm_only_model['human']))
         self.assertTrue(np.allclose(model.wv.syn0norm[model.wv.vocab['human'].index], norm_only_model['human']))
-        limited_model_kv = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), binary=True, limit=3)
+        limited_model_kv = keyedvectors.KeyedVectors.load_word2vec_format(tmpf, binary=True, limit=3)
         self.assertEqual(len(limited_model_kv.syn0), 3)
         half_precision_model_kv = keyedvectors.KeyedVectors.load_word2vec_format(
-            testfile(), binary=True, datatype=np.float16
+            tmpf, binary=True, datatype=np.float16
         )
         self.assertEqual(binary_model_kv.syn0.nbytes, half_precision_model_kv.syn0.nbytes * 2)
 
     def testNoTrainingCFormat(self):
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1)
         model.init_sims()
-        model.wv.save_word2vec_format(testfile(), binary=True)
-        kv = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), binary=True)
+        model.wv.save_word2vec_format(tmpf, binary=True)
+        kv = keyedvectors.KeyedVectors.load_word2vec_format(tmpf, binary=True)
         binary_model = word2vec.Word2Vec()
         binary_model.wv = kv
         self.assertRaises(ValueError, binary_model.train, sentences)
 
     def testTooShortBinaryWord2VecFormat(self):
-        tfile = testfile()
+        tfile = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1)
         model.init_sims()
         model.wv.save_word2vec_format(tfile, binary=True)
@@ -317,7 +306,7 @@ def testTooShortBinaryWord2VecFormat(self):
         self.assertRaises(EOFError, keyedvectors.KeyedVectors.load_word2vec_format, tfile, binary=True)
 
     def testTooShortTextWord2VecFormat(self):
-        tfile = testfile()
+        tfile = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1)
         model.init_sims()
         model.wv.save_word2vec_format(tfile, binary=False)
@@ -328,13 +317,14 @@ def testTooShortTextWord2VecFormat(self):
 
     def testPersistenceWord2VecFormatNonBinary(self):
         """Test storing/loading the entire model in word2vec non-binary format."""
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1)
         model.init_sims()
-        model.wv.save_word2vec_format(testfile(), binary=False)
-        text_model = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), binary=False)
+        model.wv.save_word2vec_format(tmpf, binary=False)
+        text_model = keyedvectors.KeyedVectors.load_word2vec_format(tmpf, binary=False)
         text_model.init_sims(False)
         self.assertTrue(np.allclose(model['human'], text_model['human'], atol=1e-6))
-        norm_only_model = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), binary=False)
+        norm_only_model = keyedvectors.KeyedVectors.load_word2vec_format(tmpf, binary=False)
         norm_only_model.init_sims(True)
         self.assertFalse(np.allclose(model['human'], norm_only_model['human'], atol=1e-6))
         self.assertTrue(np.allclose(
@@ -343,44 +333,48 @@ def testPersistenceWord2VecFormatNonBinary(self):
 
     def testPersistenceWord2VecFormatWithVocab(self):
         """Test storing/loading the entire model and vocabulary in word2vec format."""
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1)
         model.init_sims()
-        testvocab = os.path.join(tempfile.gettempdir(), 'gensim_word2vec.vocab')
-        model.wv.save_word2vec_format(testfile(), testvocab, binary=True)
-        binary_model_with_vocab_kv = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), testvocab, binary=True)
+        testvocab = get_tmpfile('gensim_word2vec.vocab')
+        model.wv.save_word2vec_format(tmpf, testvocab, binary=True)
+        binary_model_with_vocab_kv = keyedvectors.KeyedVectors.load_word2vec_format(tmpf, testvocab, binary=True)
         self.assertEqual(model.wv.vocab['human'].count, binary_model_with_vocab_kv.vocab['human'].count)
 
     def testPersistenceKeyedVectorsFormatWithVocab(self):
         """Test storing/loading the entire model and vocabulary in word2vec format."""
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1)
         model.init_sims()
-        testvocab = os.path.join(tempfile.gettempdir(), 'gensim_word2vec.vocab')
-        model.wv.save_word2vec_format(testfile(), testvocab, binary=True)
-        kv_binary_model_with_vocab = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), testvocab, binary=True)
+        testvocab = get_tmpfile('gensim_word2vec.vocab')
+        model.wv.save_word2vec_format(tmpf, testvocab, binary=True)
+        kv_binary_model_with_vocab = keyedvectors.KeyedVectors.load_word2vec_format(tmpf, testvocab, binary=True)
         self.assertEqual(model.wv.vocab['human'].count, kv_binary_model_with_vocab.vocab['human'].count)
 
     def testPersistenceWord2VecFormatCombinationWithStandardPersistence(self):
         """Test storing/loading the entire model and vocabulary in word2vec format chained with
          saving and loading via `save` and `load` methods`.
          It was possible prior to 1.0.0 release, now raises Exception"""
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1)
         model.init_sims()
-        testvocab = os.path.join(tempfile.gettempdir(), 'gensim_word2vec.vocab')
-        model.wv.save_word2vec_format(testfile(), testvocab, binary=True)
-        binary_model_with_vocab_kv = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), testvocab, binary=True)
-        binary_model_with_vocab_kv.save(testfile())
-        self.assertRaises(AttributeError, word2vec.Word2Vec.load, testfile())
+        testvocab = get_tmpfile('gensim_word2vec.vocab')
+        model.wv.save_word2vec_format(tmpf, testvocab, binary=True)
+        binary_model_with_vocab_kv = keyedvectors.KeyedVectors.load_word2vec_format(tmpf, testvocab, binary=True)
+        binary_model_with_vocab_kv.save(tmpf)
+        self.assertRaises(AttributeError, word2vec.Word2Vec.load, tmpf)
 
     def testLargeMmap(self):
         """Test storing/loading the entire model."""
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1)
 
         # test storing the internal arrays into separate files
-        model.save(testfile(), sep_limit=0)
-        self.models_equal(model, word2vec.Word2Vec.load(testfile()))
+        model.save(tmpf, sep_limit=0)
+        self.models_equal(model, word2vec.Word2Vec.load(tmpf))
 
         # make sure mmaping the arrays back works, too
-        self.models_equal(model, word2vec.Word2Vec.load(testfile(), mmap='r'))
+        self.models_equal(model, word2vec.Word2Vec.load(tmpf, mmap='r'))
 
     def testVocab(self):
         """Test word2vec vocabulary building."""
@@ -672,9 +666,10 @@ def testDeleteTemporaryTrainingData(self):
                 self.assertTrue(not hasattr(model, 'syn0_lockf'))
 
     def testNormalizeAfterTrainingData(self):
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model = word2vec.Word2Vec(sentences, min_count=1)
-        model.save(testfile())
-        norm_only_model = word2vec.Word2Vec.load(testfile())
+        model.save(tmpf)
+        norm_only_model = word2vec.Word2Vec.load(tmpf)
         norm_only_model.delete_temporary_training_data(replace_word_vectors_with_normalized=True)
         self.assertFalse(np.allclose(model['human'], norm_only_model['human']))
 
@@ -690,9 +685,10 @@ def testPredictOutputWord(self):
         self.assertEqual(predictions_out_of_vocab, None)
 
         # when required model parameters have been deleted
+        tmpf = get_tmpfile('gensim_word2vec.tst')
         model_with_neg.init_sims()
-        model_with_neg.wv.save_word2vec_format(testfile(), binary=True)
-        kv_model_with_neg = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), binary=True)
+        model_with_neg.wv.save_word2vec_format(tmpf, binary=True)
+        kv_model_with_neg = keyedvectors.KeyedVectors.load_word2vec_format(tmpf, binary=True)
         binary_model_with_neg = word2vec.Word2Vec()
         binary_model_with_neg.wv = kv_model_with_neg
         self.assertRaises(RuntimeError, binary_model_with_neg.predict_output_word, ['system', 'human'])
diff --git a/gensim/test/test_wordrank_wrapper.py b/gensim/test/test_wordrank_wrapper.py
index 4ecb9f7c70..634afa71cc 100644
--- a/gensim/test/test_wordrank_wrapper.py
+++ b/gensim/test/test_wordrank_wrapper.py
@@ -12,19 +12,11 @@
 import logging
 import unittest
 import os
-import tempfile
 
 import numpy
 
 from gensim.models.wrappers import wordrank
-
-module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
-datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
-
-
-def testfile():
-    # temporary model will be stored to this file
-    return os.path.join(tempfile.gettempdir(), 'gensim_wordrank.test')
+from gensim.test.utils import datapath, get_tmpfile
 
 
 class TestWordrank(unittest.TestCase):
@@ -61,8 +53,9 @@ def testPersistence(self):
         """Test storing/loading the entire model"""
         if not self.wr_path:
             return
-        self.test_model.save(testfile())
-        loaded = wordrank.Wordrank.load(testfile())
+        tmpf = get_tmpfile('gensim_wordrank.test')
+        self.test_model.save(tmpf)
+        loaded = wordrank.Wordrank.load(tmpf)
         self.models_equal(self.test_model, loaded)
 
     def testSimilarity(self):
diff --git a/gensim/test/utils.py b/gensim/test/utils.py
new file mode 100644
index 0000000000..89fae9226e
--- /dev/null
+++ b/gensim/test/utils.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+
+"""
+Common utils for tests
+"""
+import tempfile
+import os
+
+from gensim.corpora import Dictionary
+
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
+
+
+def datapath(fname):
+    """Return full path to the pre created file with test data (basically corpus)."""
+    return os.path.join(module_path, 'test_data', fname)
+
+
+def get_tmpfile(suffix):
+    """
+    Return full path to temporary file with required suffix.
+
+    Function doesn't create file. Double calling with the same suffix can return different paths.
+    """
+    return os.path.join(tempfile.gettempdir(), suffix)
+
+
+# set up vars used in testing ("Deerwester" from the web tutorial)
+common_texts = [
+    ['human', 'interface', 'computer'],
+    ['survey', 'user', 'computer', 'system', 'response', 'time'],
+    ['eps', 'user', 'interface', 'system'],
+    ['system', 'human', 'system', 'eps'],
+    ['user', 'response', 'time'],
+    ['trees'],
+    ['graph', 'trees'],
+    ['graph', 'minors', 'trees'],
+    ['graph', 'minors', 'survey']
+]
+
+common_dictionary = Dictionary(common_texts)
+common_corpus = [common_dictionary.doc2bow(text) for text in common_texts]
diff --git a/gensim/utils.py b/gensim/utils.py
index bca29e73cc..0627f4703c 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -288,10 +288,11 @@ def _load_specials(self, fname, mmap, compress, subname):
         opportunity to recursively included SaveLoad instances.
 
         """
-        mmap_error = lambda x, y: IOError(
-            'Cannot mmap compressed object %s in file %s. ' % (x, y) +
-            'Use `load(fname, mmap=None)` or uncompress files manually.'
-        )
+        def mmap_error(obj, filename):
+            return IOError(
+                'Cannot mmap compressed object %s in file %s. ' % (obj, filename) +
+                'Use `load(fname, mmap=None)` or uncompress files manually.'
+            )
 
         for attrib in getattr(self, '__recursive_saveloads', []):
             cfname = '.'.join((fname, attrib))
@@ -336,13 +337,8 @@ def _load_specials(self, fname, mmap, compress, subname):
     @staticmethod
     def _adapt_by_suffix(fname):
         """Give appropriate compress setting and filename formula"""
-        if fname.endswith('.gz') or fname.endswith('.bz2'):
-            compress = True
-            subname = lambda *args: '.'.join(list(args) + ['npz'])
-        else:
-            compress = False
-            subname = lambda *args: '.'.join(list(args) + ['npy'])
-        return compress, subname
+        compress, suffix = (True, 'npz') if fname.endswith('.gz') or fname.endswith('.bz2') else (False, 'npy')
+        return compress, lambda *args: '.'.join(args + (suffix,))
 
     def _smart_save(self, fname, separately=None, sep_limit=10 * 1024**2, ignore=frozenset(), pickle_protocol=2):
         """
diff --git a/setup.cfg b/setup.cfg
index a5d10487ef..26a4aa0132 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -8,4 +8,4 @@ artifact_indexes=
     http://17a25141cb7f75c18ee4-676a79255544e7711e0dd8bccdcdd1cb.r23.cf2.rackcdn.com
 
 [flake8]
-ignore = E501,E731,E12,W503,E402
+ignore = E501,E12,W503