diff --git a/ez_setup.py b/ez_setup.py
index 4da59fcd76..4251063fc0 100644
--- a/ez_setup.py
+++ b/ez_setup.py
@@ -32,19 +32,25 @@
 DEFAULT_VERSION = "1.3.2"
 DEFAULT_URL = "https://pypi.python.org/packages/source/s/setuptools/"
 
+
 def _python_cmd(*args):
     args = (sys.executable,) + args
     return subprocess.call(args) == 0
 
+
 def _check_call_py24(cmd, *args, **kwargs):
     res = subprocess.call(cmd, *args, **kwargs)
+
     class CalledProcessError(Exception):
         pass
     if not res == 0:
         msg = "Command '%s' return non-zero exit status %d" % (cmd, res)
         raise CalledProcessError(msg)
+
+
 vars(subprocess).setdefault('check_call', _check_call_py24)
 
+
 def _install(tarball, install_args=()):
     # extracting the tarball
     tmpdir = tempfile.mkdtemp()
@@ -151,6 +157,7 @@ def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
         return _do_download(version, download_base, to_dir,
                             download_delay)
 
+
 def _clean_check(cmd, target):
     """
     Run the command to download target. If the command fails, clean up before
@@ -163,6 +170,7 @@ def _clean_check(cmd, target):
             os.unlink(target)
         raise
 
+
 def download_file_powershell(url, target):
     """
     Download the file at url to target using Powershell (which will validate
@@ -176,6 +184,7 @@ def download_file_powershell(url, target):
     ]
     _clean_check(cmd, target)
 
+
 def has_powershell():
     if platform.system() != 'Windows':
         return False
@@ -184,50 +193,58 @@ def has_powershell():
     try:
         try:
             subprocess.check_call(cmd, stdout=devnull, stderr=devnull)
-        except:
+        except Exception:
             return False
     finally:
         devnull.close()
     return True
 
+
 download_file_powershell.viable = has_powershell
 
+
 def download_file_curl(url, target):
     cmd = ['curl', url, '--silent', '--output', target]
     _clean_check(cmd, target)
 
+
 def has_curl():
     cmd = ['curl', '--version']
     devnull = open(os.path.devnull, 'wb')
     try:
         try:
             subprocess.check_call(cmd, stdout=devnull, stderr=devnull)
-        except:
+        except Exception:
             return False
     finally:
         devnull.close()
     return True
 
+
 download_file_curl.viable = has_curl
 
+
 def download_file_wget(url, target):
     cmd = ['wget', url, '--quiet', '--output-document', target]
     _clean_check(cmd, target)
 
+
 def has_wget():
     cmd = ['wget', '--version']
     devnull = open(os.path.devnull, 'wb')
     try:
         try:
             subprocess.check_call(cmd, stdout=devnull, stderr=devnull)
-        except:
+        except Exception:
             return False
     finally:
         devnull.close()
     return True
 
+
 download_file_wget.viable = has_wget
 
+
 def download_file_insecure(url, target):
     """
     Use Python to download the file, even though it cannot authenticate the
@@ -251,8 +268,10 @@ def download_file_insecure(url, target):
         if dst:
             dst.close()
 
+
 download_file_insecure.viable = lambda: True
 
+
 def get_best_downloader():
     downloaders = [
         download_file_powershell,
@@ -265,6 +284,7 @@ def get_best_downloader():
         if dl.viable():
             return dl
 
+
 def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
                         to_dir=os.curdir, delay=15,
                         downloader_factory=get_best_downloader):
@@ -317,7 +337,7 @@ def _extractall(self, path=".", members=None):
     # Reverse sort directories.
     if sys.version_info < (2, 4):
         def sorter(dir1, dir2):
-            return cmp(dir1.name, dir2.name)
+            return cmp(dir1.name, dir2.name)  # noqa:F821
         directories.sort(sorter)
         directories.reverse()
     else:
@@ -350,6 +370,7 @@ def _build_install_args(options):
         install_args.append('--user')
     return install_args
 
+
 def _parse_args():
     """
     Parse the command line for options
@@ -371,6 +392,7 @@ def _parse_args():
     # positional arguments are ignored
     return options
 
+
 def main(version=DEFAULT_VERSION):
     """Install or upgrade setuptools and EasyInstall"""
     options = _parse_args()
@@ -378,5 +400,6 @@ def main(version=DEFAULT_VERSION):
         downloader_factory=options.downloader_factory)
     return _install(tarball, _build_install_args(options))
 
+
 if __name__ == '__main__':
     sys.exit(main())
diff --git a/gensim/__init__.py b/gensim/__init__.py
index aa95da4a6e..c267afe4de 100644
--- a/gensim/__init__.py
+++ b/gensim/__init__.py
@@ -3,16 +3,19 @@
 similarities within a corpus of documents.
 """
 
-from gensim import parsing, matutils, interfaces, corpora, models, similarities, summarization
+from gensim import parsing, matutils, interfaces, corpora, models, similarities, summarization  # noqa:F401
 import logging
 
 __version__ = '2.3.0'
 
+
 class NullHandler(logging.Handler):
     """For python versions <= 2.6; same as `logging.NullHandler` in 2.7."""
+
     def emit(self, record):
         pass
 
+
 logger = logging.getLogger('gensim')
-if len(logger.handlers) == 0:	# To ensure reload() doesn't add another one
+if len(logger.handlers) == 0:  # To ensure reload() doesn't add another one
     logger.addHandler(NullHandler())
diff --git a/gensim/corpora/__init__.py b/gensim/corpora/__init__.py
index a11a0df229..a5c54a65ff 100644
--- a/gensim/corpora/__init__.py
+++ b/gensim/corpora/__init__.py
@@ -3,15 +3,15 @@
 """
 
 # bring corpus classes directly into package namespace, to save some typing
-from .indexedcorpus import IndexedCorpus # must appear before the other classes
+from .indexedcorpus import IndexedCorpus  # noqa:F401 must appear before the other classes
 
-from .mmcorpus import MmCorpus
-from .bleicorpus import BleiCorpus
-from .svmlightcorpus import SvmLightCorpus
-from .lowcorpus import LowCorpus
-from .dictionary import Dictionary
-from .hashdictionary import HashDictionary
-from .wikicorpus import WikiCorpus
-from .textcorpus import TextCorpus
-from .ucicorpus import UciCorpus
-from .malletcorpus import MalletCorpus
+from .mmcorpus import MmCorpus  # noqa:F401
+from .bleicorpus import BleiCorpus  # noqa:F401
+from .svmlightcorpus import SvmLightCorpus  # noqa:F401
+from .lowcorpus import LowCorpus  # noqa:F401
+from .dictionary import Dictionary  # noqa:F401
+from .hashdictionary import HashDictionary  # noqa:F401
+from .wikicorpus import WikiCorpus  # noqa:F401
+from .textcorpus import TextCorpus  # noqa:F401
+from .ucicorpus import UciCorpus  # noqa:F401
+from .malletcorpus import MalletCorpus  # noqa:F401
diff --git a/gensim/corpora/bleicorpus.py b/gensim/corpora/bleicorpus.py
index b84d080c40..327a36fc14 100644
--- a/gensim/corpora/bleicorpus.py
+++ b/gensim/corpora/bleicorpus.py
@@ -14,7 +14,7 @@
 from os import path
 import logging
 
-from gensim import interfaces, utils
+from gensim import utils
 from gensim.corpora import IndexedCorpus
 from six.moves import xrange
 
diff --git a/gensim/corpora/dictionary.py b/gensim/corpora/dictionary.py
index a0b3f8d73e..d32276688b 100644
--- a/gensim/corpora/dictionary.py
+++ b/gensim/corpora/dictionary.py
@@ -333,14 +333,14 @@ def merge_with(self, other):
             old2new[other_id] = new_id
             try:
                 self.dfs[new_id] += other.dfs[other_id]
-            except:
+            except Exception:
                 # `other` isn't a Dictionary (probably just a dict) => ignore dfs, keep going
                 pass
         try:
             self.num_docs += other.num_docs
             self.num_nnz += other.num_nnz
             self.num_pos += other.num_pos
-        except:
+        except Exception:
             pass
 
         import gensim.models
diff --git a/gensim/corpora/hashdictionary.py b/gensim/corpora/hashdictionary.py
index 0b15de5df6..63f966b3cd 100644
--- a/gensim/corpora/hashdictionary.py
+++ b/gensim/corpora/hashdictionary.py
@@ -118,7 +118,7 @@ def add_documents(self, documents):
         for docno, document in enumerate(documents):
             if docno % 10000 == 0:
                 logger.info("adding document #%i to %s" % (docno, self))
-            _ = self.doc2bow(document, allow_update=True)  # ignore the result, here we only care about updating token ids
+            self.doc2bow(document, allow_update=True)  # ignore the result, here we only care about updating token ids
         logger.info(
             "built %s from %i documents (total %i corpus positions)",
             self, self.num_docs, self.num_pos)
diff --git a/gensim/corpora/indexedcorpus.py b/gensim/corpora/indexedcorpus.py
index e0e00e7663..62f29b25ed 100644
--- a/gensim/corpora/indexedcorpus.py
+++ b/gensim/corpora/indexedcorpus.py
@@ -51,7 +51,7 @@ def __init__(self, fname, index_fname=None):
             # change self.index into a numpy.ndarray to support fancy indexing
             self.index = numpy.asarray(self.index)
             logger.info("loaded corpus index from %s" % index_fname)
-        except:
+        except Exception:
             self.index = None
         self.length = None
 
@@ -130,5 +130,4 @@ def __getitem__(self, docno):
             raise ValueError('Unrecognised value for docno, use either a single integer, a slice or a numpy.ndarray')
 
 
-
 # endclass IndexedCorpus
diff --git a/gensim/corpora/lowcorpus.py b/gensim/corpora/lowcorpus.py
index b87f1108a2..315490cdcc 100644
--- a/gensim/corpora/lowcorpus.py
+++ b/gensim/corpora/lowcorpus.py
@@ -65,24 +65,24 @@ def __init__(self, fname, id2word=None, line2words=split_on_space):
         IndexedCorpus.__init__(self, fname)
         logger.info("loading corpus from %s" % fname)
 
-        self.fname = fname # input file, see class doc for format
-        self.line2words = line2words # how to translate lines into words (simply split on space by default)
+        self.fname = fname  # input file, see class doc for format
+        self.line2words = line2words  # how to translate lines into words (simply split on space by default)
         self.num_docs = self._calculate_num_docs()
 
         if not id2word:
             # build a list of all word types in the corpus (distinct words)
             logger.info("extracting vocabulary from the corpus")
             all_terms = set()
-            self.use_wordids = False # return documents as (word, wordCount) 2-tuples
+            self.use_wordids = False  # return documents as (word, wordCount) 2-tuples
             for doc in self:
                 all_terms.update(word for word, wordCnt in doc)
-            all_terms = sorted(all_terms) # sort the list of all words; rank in that list = word's integer id
-            self.id2word = dict(izip(xrange(len(all_terms)), all_terms)) # build a mapping of word id(int) -> word (string)
+            all_terms = sorted(all_terms)  # sort the list of all words; rank in that list = word's integer id
+            self.id2word = dict(izip(xrange(len(all_terms)), all_terms))  # build a mapping of word id(int) -> word (string)
         else:
             logger.info("using provided word mapping (%i ids)" % len(id2word))
             self.id2word = id2word
         self.num_terms = len(self.word2id)
-        self.use_wordids = True # return documents as (wordIndex, wordCount) 2-tuples
+        self.use_wordids = True  # return documents as (wordIndex, wordCount) 2-tuples
 
         logger.info("loaded corpus with %i documents and %i terms from %s" %
                      (self.num_docs, self.num_terms, fname))
@@ -135,7 +135,7 @@ def __iter__(self):
         """
         with utils.smart_open(self.fname) as fin:
             for lineno, line in enumerate(fin):
-                if lineno > 0: # ignore the first line = number of documents
+                if lineno > 0:  # ignore the first line = number of documents
                     yield self.line2doc(line)
 
     @staticmethod
diff --git a/gensim/corpora/malletcorpus.py b/gensim/corpora/malletcorpus.py
index f8410845e6..00333e9358 100644
--- a/gensim/corpora/malletcorpus.py
+++ b/gensim/corpora/malletcorpus.py
@@ -56,8 +56,8 @@ def __iter__(self):
                 yield self.line2doc(line)
 
     def line2doc(self, line):
-        l = [word for word in utils.to_unicode(line).strip().split(' ') if word]
-        docid, doclang, words = l[0], l[1], l[2:]
+        splited_line = [word for word in utils.to_unicode(line).strip().split(' ') if word]
+        docid, doclang, words = splited_line[0], splited_line[1], splited_line[2:]
 
         doc = super(MalletCorpus, self).line2doc(' '.join(words))
 
diff --git a/gensim/corpora/mmcorpus.py b/gensim/corpora/mmcorpus.py
index a9a879db3e..08e809443b 100644
--- a/gensim/corpora/mmcorpus.py
+++ b/gensim/corpora/mmcorpus.py
@@ -12,7 +12,7 @@
 
 import logging
 
-from gensim import interfaces, matutils
+from gensim import matutils
 from gensim.corpora import IndexedCorpus
 
 
@@ -23,6 +23,7 @@ class MmCorpus(matutils.MmReader, IndexedCorpus):
     """
     Corpus in the Matrix Market format.
     """
+
     def __init__(self, fname):
         # avoid calling super(), too confusing
         IndexedCorpus.__init__(self, fname)
diff --git a/gensim/corpora/sharded_corpus.py b/gensim/corpora/sharded_corpus.py
index 16ecaf3d12..255fc2b7fe 100644
--- a/gensim/corpora/sharded_corpus.py
+++ b/gensim/corpora/sharded_corpus.py
@@ -26,6 +26,12 @@
 import scipy.sparse as sparse
 import time
 
+from six.moves import xrange
+
+import gensim
+from gensim.corpora import IndexedCorpus
+from gensim.interfaces import TransformedCorpus
+
 logger = logging.getLogger(__name__)
 
 #: Specifies which dtype should be used for serializing the shards.
@@ -37,13 +43,6 @@
     logger.info('Could not import Theano, will use standard float for default ShardedCorpus dtype.')
 
 
-from six.moves import xrange
-
-import gensim
-from gensim.corpora import IndexedCorpus
-from gensim.interfaces import TransformedCorpus
-
-
 class ShardedCorpus(IndexedCorpus):
     """
     This corpus is designed for situations where you need to train a model
@@ -233,7 +232,7 @@ def __init__(self, output_prefix, corpus, dim=None,
         self.current_shard = None    # The current shard itself (numpy ndarray)
         self.current_shard_n = None  # Current shard is the current_shard_n-th
         self.current_offset = None   # The index into the dataset which
-                                     # corresponds to index 0 of current shard
+        # corresponds to index 0 of current shard
 
         logger.info('Initializing sharded corpus with prefix '
                      '{0}'.format(output_prefix))
@@ -331,7 +330,7 @@ def save_shard(self, shard, n=None, filename=None):
         """
         new_shard = False
         if n is None:
-            n = self.n_shards # Saving the *next* one by default.
+            n = self.n_shards  # Saving the *next* one by default.
             new_shard = True
 
         if not filename:
@@ -347,7 +346,7 @@ def load_shard(self, n):
         """
         Load (unpickle) the n-th shard as the "live" part of the dataset
         into the Dataset object."""
-        #logger.debug('ShardedCorpus loading shard {0}, '
+        # logger.debug('ShardedCorpus loading shard {0}, '
         #              'current shard: {1}'.format(n, self.current_shard_n))
 
         # No-op if the shard is already open.
@@ -416,7 +415,7 @@ def in_next(self, offset):
 
         """
         if self.current_shard_n == self.n_shards:
-            return False # There's no next shard.
+            return False  # There's no next shard.
         return (self.offsets[self.current_shard_n + 1] <= offset) \
                and (offset < self.offsets[self.current_shard_n + 2])
 
@@ -611,7 +610,7 @@ def __getitem__(self, offset):
                 # This fails on one-past
                 # slice indexing; that's why there's a code branch here.
 
-            #logger.debug('ShardedCorpus: Retrieving slice {0}: '
+            # logger.debug('ShardedCorpus: Retrieving slice {0}: '
             #              'shard {1}'.format((offset.start, offset.stop),
             #                                 (first_shard, last_shard)))
 
@@ -656,13 +655,13 @@ def __getitem__(self, offset):
             shard_stop = self.offsets[self.current_shard_n + 1] - \
                          self.current_offset
 
-            #s_result[result_start:result_stop] = self.current_shard[
+            # s_result[result_start:result_stop] = self.current_shard[
             #                                         shard_start:shard_stop]
             s_result = self.__add_to_slice(s_result, result_start, result_stop,
                                            shard_start, shard_stop)
 
             # First and last get special treatment, these are in between
-            for shard_n in xrange(first_shard+1, last_shard):
+            for shard_n in xrange(first_shard + 1, last_shard):
                 self.load_shard(shard_n)
 
                 result_start = result_stop
@@ -753,7 +752,7 @@ def _getitem_sparse2gensim(self, result):
 
         """
         def row_sparse2gensim(row_idx, csr_matrix):
-            indices = csr_matrix.indices[csr_matrix.indptr[row_idx]:csr_matrix.indptr[row_idx+1]]
+            indices = csr_matrix.indices[csr_matrix.indptr[row_idx]:csr_matrix.indptr[row_idx + 1]]
             g_row = [(col_idx, csr_matrix[row_idx, col_idx]) for col_idx in indices]
             return g_row
 
diff --git a/gensim/corpora/svmlightcorpus.py b/gensim/corpora/svmlightcorpus.py
index 4fdc764b16..5e24419421 100644
--- a/gensim/corpora/svmlightcorpus.py
+++ b/gensim/corpora/svmlightcorpus.py
@@ -58,7 +58,7 @@ def __init__(self, fname, store_labels=True):
         IndexedCorpus.__init__(self, fname)
         logger.info("loading corpus from %s" % fname)
 
-        self.fname = fname # input file, see class doc for format
+        self.fname = fname  # input file, see class doc for format
         self.length = None
         self.store_labels = store_labels
         self.labels = []
@@ -94,7 +94,7 @@ def save_corpus(fname, corpus, id2word=None, labels=False, metadata=False):
         offsets = []
         with utils.smart_open(fname, 'wb') as fout:
             for docno, doc in enumerate(corpus):
-                label = labels[docno] if labels else 0 # target class is 0 by default
+                label = labels[docno] if labels else 0  # target class is 0 by default
                 offsets.append(fout.tell())
                 fout.write(utils.to_utf8(SvmLightCorpus.doc2line(doc, label)))
         return offsets
@@ -114,12 +114,12 @@ def line2doc(self, line):
         line = utils.to_unicode(line)
         line = line[: line.find('#')].strip()
         if not line:
-            return None # ignore comments and empty lines
+            return None  # ignore comments and empty lines
         parts = line.split()
         if not parts:
             raise ValueError('invalid line format in %s' % self.fname)
         target, fields = parts[0], [part.rsplit(':', 1) for part in parts[1:]]
-        doc = [(int(p1) - 1, float(p2)) for p1, p2 in fields if p1 != 'qid'] # ignore 'qid' features, convert 1-based feature ids to 0-based
+        doc = [(int(p1) - 1, float(p2)) for p1, p2 in fields if p1 != 'qid']  # ignore 'qid' features, convert 1-based feature ids to 0-based
         return doc, target
 
     @staticmethod
@@ -127,7 +127,7 @@ def doc2line(doc, label=0):
         """
         Output the document in SVMlight format, as a string. Inverse function to `line2doc`.
         """
-        pairs = ' '.join("%i:%s" % (termid + 1, termval) for termid, termval in doc) # +1 to convert 0-base to 1-base
+        pairs = ' '.join("%i:%s" % (termid + 1, termval) for termid, termval in doc)  # +1 to convert 0-base to 1-base
         return "%s %s\n" % (label, pairs)
 
 # endclass SvmLightCorpus
diff --git a/gensim/corpora/ucicorpus.py b/gensim/corpora/ucicorpus.py
index 44b2a772d9..0c09cc7e34 100644
--- a/gensim/corpora/ucicorpus.py
+++ b/gensim/corpora/ucicorpus.py
@@ -21,7 +21,7 @@
 from gensim.corpora import IndexedCorpus
 from gensim.matutils import MmReader
 from gensim.matutils import MmWriter
-from six import iteritems, string_types
+from six import iteritems
 from six.moves import xrange
 
 
@@ -118,7 +118,7 @@ def write_corpus(fname, corpus, progress_cnt=1000, index=False):
                 offsets.append(posnow)
                 poslast = posnow
 
-            vector = [(x, int(y)) for (x, y) in bow if int(y) != 0] # integer count, not floating weights
+            vector = [(x, int(y)) for (x, y) in bow if int(y) != 0]  # integer count, not floating weights
             max_id, veclen = writer.write_vector(docno, vector)
             num_terms = max(num_terms, 1 + max_id)
             num_nnz += veclen
@@ -165,7 +165,7 @@ def __iter__(self):
         (yielding one document at a time).
         """
         for docId, doc in super(UciCorpus, self).__iter__():
-            yield doc # get rid of docId, return the sparse vector only
+            yield doc  # get rid of docId, return the sparse vector only
 
     def create_dictionary(self):
         """
diff --git a/gensim/corpora/wikicorpus.py b/gensim/corpora/wikicorpus.py
index ec032067f1..ea87cce4a2 100755
--- a/gensim/corpora/wikicorpus.py
+++ b/gensim/corpora/wikicorpus.py
@@ -187,6 +187,8 @@ def get_namespace(tag):
         raise ValueError("%s not recognized as MediaWiki dump namespace"
                          % namespace)
     return namespace
+
+
 _get_namespace = get_namespace
 
 
@@ -233,6 +235,8 @@ def extract_pages(f, filter_namespaces=False):
             # ./revision/text element. The pages comprise the bulk of the
             # file, so in practice we prune away enough.
             elem.clear()
+
+
 _extract_pages = extract_pages  # for backward compatibility
 
 
@@ -266,6 +270,7 @@ class WikiCorpus(TextCorpus):
     >>> MmCorpus.serialize('wiki_en_vocab200k.mm', wiki) # another 8h, creates a file in MatrixMarket format plus file with id->word
 
     """
+
     def __init__(self, fname, processes=None, lemmatize=utils.has_pattern(), dictionary=None,
                  filter_namespaces=('0',)):
         """
diff --git a/gensim/examples/dmlcz/__init__.py b/gensim/examples/dmlcz/__init__.py
index 8b13789179..e69de29bb2 100644
--- a/gensim/examples/dmlcz/__init__.py
+++ b/gensim/examples/dmlcz/__init__.py
@@ -1 +0,0 @@
-
diff --git a/gensim/examples/dmlcz/dmlcorpus.py b/gensim/examples/dmlcz/dmlcorpus.py
index 63c9f16855..d8fb8c4cb5 100644
--- a/gensim/examples/dmlcz/dmlcorpus.py
+++ b/gensim/examples/dmlcz/dmlcorpus.py
@@ -11,11 +11,10 @@
 
 
 import logging
-import itertools
 import os.path
 
 from gensim import interfaces, matutils
-import dictionary # for constructing word->id mappings
+import dictionary  # for constructing word->id mappings
 
 
 logger = logging.getLogger('gensim.corpora.dmlcorpus')
@@ -34,39 +33,35 @@ class DmlConfig(object):
     output files and which articles to accept for the corpus (= an additional filter
     over the sources).
     """
-    def __init__(self, configId, resultDir, acceptLangs = None):
-        self.resultDir = resultDir # output files will be stored in this directory
-        self.configId = configId # configId is a string that is used as filename prefix for all files, so keep it simple
-        self.sources = {} # all article sources; see sources.DmlSource class for an example of source
 
-        if acceptLangs is None: # which languages to accept
-            acceptLangs = set(['any']) # if not specified, accept all languages (including unknown/unspecified)
+    def __init__(self, configId, resultDir, acceptLangs=None):
+        self.resultDir = resultDir  # output files will be stored in this directory
+        self.configId = configId  # configId is a string that is used as filename prefix for all files, so keep it simple
+        self.sources = {}  # all article sources; see sources.DmlSource class for an example of source
+
+        if acceptLangs is None:  # which languages to accept
+            acceptLangs = set(['any'])  # if not specified, accept all languages (including unknown/unspecified)
         self.acceptLangs = set(acceptLangs)
         logger.info('initialized %s' % self)
 
-
     def resultFile(self, fname):
         return os.path.join(self.resultDir, self.configId + '_' + fname)
 
-
     def acceptArticle(self, metadata):
-        lang = metadata.get('language', 'unk') # if there was no language field in the article metadata, set language to 'unk' = unknown
+        lang = metadata.get('language', 'unk')  # if there was no language field in the article metadata, set language to 'unk' = unknown
         if 'any' not in self.acceptLangs and lang not in self.acceptLangs:
             return False
         return True
 
-
     def addSource(self, source):
         sourceId = str(source)
         assert sourceId not in self.sources, "source %s already present in the config!" % sourceId
         self.sources[sourceId] = source
 
-
     def __str__(self):
         return ("DmlConfig(id=%s, sources=[%s], acceptLangs=[%s])" %
                 (self.configId, ', '.join(self.sources.iterkeys()), ', '.join(self.acceptLangs)))
-#endclass DmlConfig
-
+# endclass DmlConfig
 
 
 class DmlCorpus(interfaces.CorpusABC):
@@ -79,16 +74,15 @@ class DmlCorpus(interfaces.CorpusABC):
     DmlCorpus has methods for building a dictionary (mapping between words and
     their ids).
     """
+
     def __init__(self):
         self.documents = []
         self.config = None
         self.dictionary = dictionary.Dictionary()
 
-
     def __len__(self):
         return len(self.documents)
 
-
     def __iter__(self):
         """
         The function that defines a corpus -- iterating over the corpus yields
@@ -101,8 +95,7 @@ def __iter__(self):
 
             contents = source.getContent(docUri)
             words = [source.normalizeWord(word) for word in source.tokenize(contents)]
-            yield self.dictionary.doc2bow(words, allowUpdate = False)
-
+            yield self.dictionary.doc2bow(words, allowUpdate=False)
 
     def buildDictionary(self):
         """
@@ -125,12 +118,11 @@ def buildDictionary(self):
             numPositions += len(words)
 
             # convert to bag-of-words, but ignore the result -- here we only care about updating token ids
-            _ = self.dictionary.doc2bow(words, allowUpdate = True)
+            _ = self.dictionary.doc2bow(words, allowUpdate=True)  # noqa:F841
         logger.info("built %s from %i documents (total %i corpus positions)" %
                      (self.dictionary, len(self.documents), numPositions))
 
-
-    def processConfig(self, config, shuffle = False):
+    def processConfig(self, config, shuffle=False):
         """
         Parse the directories specified in the config, looking for suitable articles.
 
@@ -148,8 +140,8 @@ def processConfig(self, config, shuffle = False):
             logger.info("processing source '%s'" % sourceId)
             accepted = []
             for articleUri in source.findArticles():
-                meta = source.getMeta(articleUri) # retrieve metadata (= dictionary of key->value)
-                if config.acceptArticle(meta): # do additional filtering on articles, based on the article's metadata
+                meta = source.getMeta(articleUri)  # retrieve metadata (= dictionary of key->value)
+                if config.acceptArticle(meta):  # do additional filtering on articles, based on the article's metadata
                     accepted.append((sourceId, articleUri))
             logger.info("accepted %i articles for source '%s'" %
                          (len(accepted), sourceId))
@@ -166,7 +158,6 @@ def processConfig(self, config, shuffle = False):
         logger.info("accepted total of %i articles for %s" %
                      (len(self.documents), str(config)))
 
-
     def saveDictionary(self, fname):
         logger.info("saving dictionary mapping to %s" % fname)
         fout = open(fname, 'w')
@@ -194,7 +185,6 @@ def saveDocuments(self, fname):
             fout.write("%i\t%s\n" % (docNo, repr(docId)))
         fout.close()
 
-
     def saveAsText(self):
         """
         Store the corpus to disk, in a human-readable text format.
@@ -211,7 +201,6 @@ def saveAsText(self):
         self.saveDocuments(self.config.resultFile('docids.txt'))
         matutils.MmWriter.writeCorpus(self.config.resultFile('bow.mm'), self)
 
-
     def articleDir(self, docNo):
         """
         Return absolute normalized path on filesystem to article no. `docNo`.
@@ -220,7 +209,6 @@ def articleDir(self, docNo):
         source = self.config.sources[sourceId]
         return os.path.join(source.baseDir, outPath)
 
-
     def getMeta(self, docNo):
         """
         Return metadata for article no. `docNo`.
@@ -228,5 +216,4 @@ def getMeta(self, docNo):
         sourceId, uri = self.documents[docNo]
         source = self.config.sources[sourceId]
         return source.getMeta(uri)
-#endclass DmlCorpus
-
+# endclass DmlCorpus
diff --git a/gensim/examples/dmlcz/gensim_build.py b/gensim/examples/dmlcz/gensim_build.py
index 4e258ada8d..9695241fb3 100755
--- a/gensim/examples/dmlcz/gensim_build.py
+++ b/gensim/examples/dmlcz/gensim_build.py
@@ -15,12 +15,9 @@
 import logging
 import sys
 import os.path
-import re
-
 
 from gensim.corpora import sources, dmlcorpus
 
-
 PREFIX = 'dmlcz'
 
 AT_HOME = False
@@ -51,12 +48,12 @@
 
 def buildDmlCorpus(config):
     dml = dmlcorpus.DmlCorpus()
-    dml.processConfig(config, shuffle = True)
+    dml.processConfig(config, shuffle=True)
     dml.buildDictionary()
-    dml.dictionary.filterExtremes(noBelow=5, noAbove=0.3) # ignore too (in)frequent words
+    dml.dictionary.filterExtremes(noBelow=5, noAbove=0.3)  # ignore too (in)frequent words
 
-    dml.save(config.resultFile('.pkl')) # save the mappings as binary data (actual documents are not saved, only their URIs)
-    dml.saveAsText() # save id mappings and documents as text data (matrix market format)
+    dml.save(config.resultFile('.pkl'))  # save the mappings as binary data (actual documents are not saved, only their URIs)
+    dml.saveAsText()  # save id mappings and documents as text data (matrix market format)
     return dml
 
 
diff --git a/gensim/examples/dmlcz/gensim_genmodel.py b/gensim/examples/dmlcz/gensim_genmodel.py
index 428f8b5536..df11f9696c 100755
--- a/gensim/examples/dmlcz/gensim_genmodel.py
+++ b/gensim/examples/dmlcz/gensim_genmodel.py
@@ -15,25 +15,22 @@
 import logging
 import sys
 import os.path
-import re
 
-
-from gensim.corpora import sources, dmlcorpus, MmCorpus
+from gensim.corpora import dmlcorpus, MmCorpus
 from gensim.models import lsimodel, ldamodel, tfidfmodel, rpmodel
 
 import gensim_build
 
 
 # internal method parameters
-DIM_RP = 300 # dimensionality for random projections
-DIM_LSI = 200 # for lantent semantic indexing
-DIM_LDA = 100 # for latent dirichlet allocation
-
+DIM_RP = 300  # dimensionality for random projections
+DIM_LSI = 200  # for lantent semantic indexing
+DIM_LDA = 100  # for latent dirichlet allocation
 
 
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
-    logging.root.setLevel(level = logging.INFO)
+    logging.root.setLevel(level=logging.INFO)
     logging.info("running %s" % ' '.join(sys.argv))
 
     program = os.path.basename(sys.argv[0])
@@ -56,22 +53,22 @@
     corpus = MmCorpus(config.resultFile('bow.mm'))
 
     if method == 'tfidf':
-        model = tfidfmodel.TfidfModel(corpus, id2word = id2word, normalize = True)
+        model = tfidfmodel.TfidfModel(corpus, id2word=id2word, normalize=True)
         model.save(config.resultFile('model_tfidf.pkl'))
     elif method == 'lda':
-        model = ldamodel.LdaModel(corpus, id2word = id2word, numTopics = DIM_LDA)
+        model = ldamodel.LdaModel(corpus, id2word=id2word, numTopics=DIM_LDA)
         model.save(config.resultFile('model_lda.pkl'))
     elif method == 'lsi':
         # first, transform word counts to tf-idf weights
-        tfidf = tfidfmodel.TfidfModel(corpus, id2word = id2word, normalize = True)
+        tfidf = tfidfmodel.TfidfModel(corpus, id2word=id2word, normalize=True)
         # then find the transformation from tf-idf to latent space
-        model = lsimodel.LsiModel(tfidf[corpus], id2word = id2word, numTopics = DIM_LSI)
+        model = lsimodel.LsiModel(tfidf[corpus], id2word=id2word, numTopics=DIM_LSI)
         model.save(config.resultFile('model_lsi.pkl'))
     elif method == 'rp':
         # first, transform word counts to tf-idf weights
-        tfidf = tfidfmodel.TfidfModel(corpus, id2word = id2word, normalize = True)
+        tfidf = tfidfmodel.TfidfModel(corpus, id2word=id2word, normalize=True)
         # then find the transformation from tf-idf to latent space
-        model = rpmodel.RpModel(tfidf[corpus], id2word = id2word, numTopics = DIM_RP)
+        model = rpmodel.RpModel(tfidf[corpus], id2word=id2word, numTopics=DIM_RP)
         model.save(config.resultFile('model_rp.pkl'))
     else:
         raise ValueError('unknown topic extraction method: %s' % repr(method))
@@ -79,4 +76,3 @@
     MmCorpus.saveCorpus(config.resultFile('%s.mm' % method), model[corpus])
 
     logging.info("finished running %s" % program)
-
diff --git a/gensim/examples/dmlcz/gensim_xml.py b/gensim/examples/dmlcz/gensim_xml.py
index 8ac2b265c2..f810d045d4 100755
--- a/gensim/examples/dmlcz/gensim_xml.py
+++ b/gensim/examples/dmlcz/gensim_xml.py
@@ -14,10 +14,8 @@
 import logging
 import sys
 import os.path
-import re
 
-
-from gensim.corpora import sources, dmlcorpus, MmCorpus
+from gensim.corpora import dmlcorpus, MmCorpus
 from gensim.similarities import MatrixSimilarity, SparseMatrixSimilarity
 
 import gensim_build
@@ -28,8 +26,8 @@
 DRY_RUN = False
 
 # how many 'most similar' documents to store in each similar.xml?
-MIN_SCORE = 0.0 # prune based on similarity score (all below MIN_SCORE are ignored)
-MAX_SIMILAR = 10 # prune based on rank (at most MAX_SIMILAR are stored). set to 0 to store all of them (no limit).
+MIN_SCORE = 0.0  # prune based on similarity score (all below MIN_SCORE are ignored)
+MAX_SIMILAR = 10  # prune based on rank (at most MAX_SIMILAR are stored). set to 0 to store all of them (no limit).
 
 # if there are no similar articles (after the pruning), do we still want to generate similar.xml?
 SAVE_EMPTY = True
@@ -55,29 +53,28 @@
 """
 
 
-
 def generateSimilar(corpus, index, method):
-    for docNo, topSims in enumerate(index): # for each document
+    for docNo, topSims in enumerate(index):  # for each document
         # store similarities to the following file
         outfile = os.path.join(corpus.articleDir(docNo), 'similar_%s.xml' % method)
 
-        articles = [] # collect similars in this list
-        for docNo2, score in topSims: # for each most similar article
-            if score > MIN_SCORE and docNo != docNo2: # if similarity is above MIN_SCORE and not identity (=always maximum similarity, boring)
+        articles = []  # collect similars in this list
+        for docNo2, score in topSims:  # for each most similar article
+            if score > MIN_SCORE and docNo != docNo2:  # if similarity is above MIN_SCORE and not identity (=always maximum similarity, boring)
                 source, (intId, pathId) = corpus.documents[docNo2]
                 meta = corpus.getMeta(docNo2)
                 suffix, author, title = '', meta.get('author', ''), meta.get('title', '')
-                articles.append(ARTICLE % locals()) # add the similar article to output
+                articles.append(ARTICLE % locals())  # add the similar article to output
                 if len(articles) >= MAX_SIMILAR:
                     break
 
         # now `articles` holds multiple strings in similar_*.xml format
         if SAVE_EMPTY or articles:
-            output = ''.join(articles) # concat all similars to one string
-            if not DRY_RUN: # only open output files for writing if DRY_RUN is false
+            output = ''.join(articles)  # concat all similars to one string
+            if not DRY_RUN:  # only open output files for writing if DRY_RUN is false
                 logging.info("generating %s (%i similars)" % (outfile, len(articles)))
                 outfile = open(outfile, 'w')
-                outfile.write(SIMILAR % output) # add xml headers and print to file
+                outfile.write(SIMILAR % output)  # add xml headers and print to file
                 outfile.close()
             else:
                 logging.info("would be generating %s (%i similars):%s\n" % (outfile, len(articles), output))
@@ -85,7 +82,6 @@ def generateSimilar(corpus, index, method):
             logging.debug("skipping %s (no similar found)" % outfile)
 
 
-
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
     logging.root.setLevel(level=logging.INFO)
@@ -112,14 +108,13 @@ def generateSimilar(corpus, index, method):
     input = MmCorpus(config.resultFile('_%s.mm' % method))
     assert len(input) == len(corpus), "corpus size mismatch (%i vs %i): run ./gensim_genmodel.py again" % (len(input), len(corpus))
 
-     # initialize structure for similarity queries
-    if method == 'lsi' or method == 'rp': # for these methods, use dense vectors
+    # initialize structure for similarity queries
+    if method == 'lsi' or method == 'rp':  # for these methods, use dense vectors
         index = MatrixSimilarity(input, numBest=MAX_SIMILAR + 1, numFeatures=input.numTerms)
     else:
         index = SparseMatrixSimilarity(input, numBest=MAX_SIMILAR + 1)
 
-    index.normalize = False # do not normalize query vectors during similarity queries (the index is already built normalized, so it would be a no-op)
-    generateSimilar(corpus, index, method) # for each document, print MAX_SIMILAR nearest documents to a xml file, in dml-cz specific format
+    index.normalize = False  # do not normalize query vectors during similarity queries (the index is already built normalized, so it would be a no-op)
+    generateSimilar(corpus, index, method)  # for each document, print MAX_SIMILAR nearest documents to a xml file, in dml-cz specific format
 
     logging.info("finished running %s" % program)
-
diff --git a/gensim/examples/dmlcz/sources.py b/gensim/examples/dmlcz/sources.py
index f6244ad361..da4e0ac0b0 100644
--- a/gensim/examples/dmlcz/sources.py
+++ b/gensim/examples/dmlcz/sources.py
@@ -20,10 +20,13 @@
 import os.path
 import re
 
-import xml.sax # for parsing arxmliv articles
+import xml.sax  # for parsing arxmliv articles
 
 from gensim import utils
 
+import sys
+if sys.version_info[0] >= 3:
+    unicode = str
 
 PAT_TAG = re.compile('<(.*?)>(.*)</.*?>')
 logger = logging.getLogger('gensim.corpora.sources')
@@ -44,6 +47,7 @@ class ArticleSource(object):
     This class is just an ABC interface; see eg. DmlSource or ArxmlivSource classes
     for concrete instances.
     """
+
     def __init__(self, sourceId):
         self.sourceId = sourceId
 
@@ -64,8 +68,7 @@ def tokenize(self, content):
 
     def normalizeWord(self, word):
         raise NotImplementedError('Abstract Base Class')
-#endclass ArticleSource
-
+# endclass ArticleSource
 
 
 class DmlSource(ArticleSource):
@@ -79,6 +82,7 @@ class DmlSource(ArticleSource):
 
     See the ArticleSource class for general info on sources.
     """
+
     def __init__(self, sourceId, baseDir):
         self.sourceId = sourceId
         self.baseDir = os.path.normpath(baseDir)
@@ -94,12 +98,12 @@ def parseDmlMeta(cls, xmlfile):
         result = {}
         xml = open(xmlfile)
         for line in xml:
-            if line.find('<article>') >= 0: # skip until the beginning of <article> tag
+            if line.find('<article>') >= 0:  # skip until the beginning of <article> tag
                 break
         for line in xml:
-            if line.find('</article>') >= 0: # end of <article>, we're done
+            if line.find('</article>') >= 0:  # end of <article>, we're done
                 break
-            p = re.search(PAT_TAG, line) # HAX assumes one element = one line; proper xml parsing probably better... but who cares
+            p = re.search(PAT_TAG, line)  # HAX assumes one element = one line; proper xml parsing probably better... but who cares
             if p:
                 name, cont = p.groups()
                 name = name.split()[0]
@@ -110,20 +114,18 @@ def parseDmlMeta(cls, xmlfile):
                     result.setdefault('msc', []).append(cont)
                     continue
                 if name == 'idMR':
-                    cont = cont[2:] # omit MR from MR123456
+                    cont = cont[2:]  # omit MR from MR123456
                 if name and cont:
                     result[name] = cont
         xml.close()
         return result
 
-
     def idFromDir(self, path):
         assert len(path) > len(self.baseDir)
-        intId = path[1 + path.rfind('#') : ]
-        pathId = path[len(self.baseDir) + 1 : ]
+        intId = path[1 + path.rfind('#'):]
+        pathId = path[1 + len(self.baseDir):]
         return (intId, pathId)
 
-
     def isArticle(self, path):
         # in order to be valid, the article directory must start with '#'
         if not os.path.basename(path).startswith('#'):
@@ -138,7 +140,6 @@ def isArticle(self, path):
             return False
         return True
 
-
     def findArticles(self):
         dirTotal = artAccepted = 0
         logger.info("looking for '%s' articles inside %s" % (self.sourceId, self.baseDir))
@@ -151,7 +152,6 @@ def findArticles(self):
         logger.info('%i directories processed, found %i articles' %
                      (dirTotal, artAccepted))
 
-
     def getContent(self, uri):
         """
         Return article content as a single large string.
@@ -160,7 +160,6 @@ def getContent(self, uri):
         filename = os.path.join(self.baseDir, pathId, 'fulltext.txt')
         return open(filename).read()
 
-
     def getMeta(self, uri):
         """
         Return article metadata as a attribute->value dictionary.
@@ -169,15 +168,13 @@ def getMeta(self, uri):
         filename = os.path.join(self.baseDir, pathId, 'meta.xml')
         return DmlSource.parseDmlMeta(filename)
 
-
     def tokenize(self, content):
-        return [token.encode('utf8') for token in utils.tokenize(content, errors = 'ignore') if not token.isdigit()]
-
+        return [token.encode('utf8') for token in utils.tokenize(content, errors='ignore') if not token.isdigit()]
 
     def normalizeWord(self, word):
         wordU = unicode(word, 'utf8')
-        return wordU.lower().encode('utf8') # lowercase and then convert back to bytestring
-#endclass DmlSource
+        return wordU.lower().encode('utf8')  # lowercase and then convert back to bytestring
+# endclass DmlSource
 
 
 class DmlCzSource(DmlSource):
@@ -190,13 +187,13 @@ class DmlCzSource(DmlSource):
 
     See the ArticleSource class for general info on sources.
     """
+
     def idFromDir(self, path):
         assert len(path) > len(self.baseDir)
         dmlczId = open(os.path.join(path, 'dspace_id')).read().strip()
-        pathId = path[len(self.baseDir) + 1 : ]
+        pathId = path[1 + len(self.baseDir):]
         return (dmlczId, pathId)
 
-
     def isArticle(self, path):
         # in order to be valid, the article directory must start with '#'
         if not os.path.basename(path).startswith('#'):
@@ -215,7 +212,6 @@ def isArticle(self, path):
             return False
         return True
 
-
     def getContent(self, uri):
         """
         Return article content as a single large string.
@@ -236,8 +232,7 @@ def getContent(self, uri):
             assert os.path.exists(filename2)
             filename = filename2
         return open(filename).read()
-#endclass DmlCzSource
-
+# endclass DmlCzSource
 
 
 class ArxmlivSource(ArticleSource):
@@ -253,8 +248,8 @@ class ArxmlivSource(ArticleSource):
     """
     class ArxmlivContentHandler(xml.sax.handler.ContentHandler):
         def __init__(self):
-            self.path = [''] # help structure for sax event parsing
-            self.tokens = [] # will contain tokens once parsing is finished
+            self.path = ['']  # help structure for sax event parsing
+            self.tokens = []  # will contain tokens once parsing is finished
 
         def startElement(self, name, attr):
             # for math tokens, we only care about Math elements directly below <p>
@@ -270,10 +265,9 @@ def endElement(self, name):
         def characters(self, text):
             # for text, we only care about tokens directly within the <p> tag
             if self.path[-1] == 'p':
-                tokens = [token.encode('utf8') for token in utils.tokenize(text, errors = 'ignore') if not token.isdigit()]
+                tokens = [token.encode('utf8') for token in utils.tokenize(text, errors='ignore') if not token.isdigit()]
                 self.tokens.extend(tokens)
-    #endclass ArxmlivHandler
-
+    # endclass ArxmlivHandler
 
     class ArxmlivErrorHandler(xml.sax.handler.ErrorHandler):
         # Python2.5 implementation of xml.sax is broken -- character streams and
@@ -287,25 +281,21 @@ def error(self, exception):
 #            logger.debug("SAX error parsing xml: %s" % exception)
 
         warning = fatalError = error
-    #endclass ArxmlivErrorHandler
-
+    # endclass ArxmlivErrorHandler
 
     def __init__(self, sourceId, baseDir):
         self.sourceId = sourceId
         self.baseDir = os.path.normpath(baseDir)
 
-
     def __str__(self):
         return self.sourceId
 
-
     def idFromDir(self, path):
         assert len(path) > len(self.baseDir)
-        intId = path[1 + path.rfind('#') : ]
-        pathId = path[len(self.baseDir) + 1 : ]
+        intId = path[1 + path.rfind('#'):]
+        pathId = path[1 + len(self.baseDir):]
         return (intId, pathId)
 
-
     def isArticle(self, path):
         # in order to be valid, the article directory must start with '#'
         if not os.path.basename(path).startswith('#'):
@@ -316,7 +306,6 @@ def isArticle(self, path):
             return False
         return True
 
-
     def findArticles(self):
         dirTotal = artAccepted = 0
         logger.info("looking for '%s' articles inside %s" % (self.sourceId, self.baseDir))
@@ -329,7 +318,6 @@ def findArticles(self):
         logger.info('%i directories processed, found %i articles' %
                      (dirTotal, artAccepted))
 
-
     def getContent(self, uri):
         """
         Return article content as a single large string.
@@ -338,15 +326,13 @@ def getContent(self, uri):
         filename = os.path.join(self.baseDir, pathId, 'tex.xml')
         return open(filename).read()
 
-
     def getMeta(self, uri):
         """
         Return article metadata as an attribute->value dictionary.
         """
 #        intId, pathId = uri
 #        filename = os.path.join(self.baseDir, pathId, 'tex.xml')
-        return {'language': 'eng'} # TODO maybe parse out some meta; but currently not needed for anything...
-
+        return {'language': 'eng'}  # TODO maybe parse out some meta; but currently not needed for anything...
 
     def tokenize(self, content):
         """
@@ -361,12 +347,9 @@ def tokenize(self, content):
         xml.sax.parseString(content, handler, ArxmlivSource.ArxmlivErrorHandler())
         return handler.tokens
 
-
     def normalizeWord(self, word):
-        if word[0] == '$': # ignore math tokens
+        if word[0] == '$':  # ignore math tokens
             return word
         wordU = unicode(word, 'utf8')
-        return wordU.lower().encode('utf8') # lowercase and then convert back to bytestring
-#endclass ArxmlivSource
-
-
+        return wordU.lower().encode('utf8')  # lowercase and then convert back to bytestring
+# endclass ArxmlivSource
diff --git a/gensim/interfaces.py b/gensim/interfaces.py
index 58e6f45b13..4087fd8893 100644
--- a/gensim/interfaces.py
+++ b/gensim/interfaces.py
@@ -14,7 +14,6 @@
 from __future__ import with_statement
 
 import logging
-import itertools
 
 from gensim import utils, matutils
 from six.moves import xrange
@@ -48,13 +47,13 @@ class CorpusABC(utils.SaveLoad):
     state, and **not** the documents themselves. See the `save_corpus` static method
     for serializing the actual stream content.
     """
+
     def __iter__(self):
         """
         Iterate over the corpus, yielding one document at a time.
         """
         raise NotImplementedError('cannot instantiate abstract base class')
 
-
     def save(self, *args, **kwargs):
         import warnings
         warnings.warn("corpus.save() stores only the (tiny) iteration object; "
@@ -98,16 +97,16 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
         # example code:
         logger.info("converting corpus to ??? format: %s" % fname)
         with utils.smart_open(fname, 'wb') as fout:
-            for doc in corpus: # iterate over the document stream
-                fmt = str(doc) # format the document appropriately...
-                fout.write(utils.to_utf8("%s\n" % fmt)) # serialize the formatted document to disk
-#endclass CorpusABC
+            for doc in corpus:  # iterate over the document stream
+                fmt = str(doc)  # format the document appropriately...
+                fout.write(utils.to_utf8("%s\n" % fmt))  # serialize the formatted document to disk
+# endclass CorpusABC
 
 
 class TransformedCorpus(CorpusABC):
     def __init__(self, obj, corpus, chunksize=None, **kwargs):
         self.obj, self.corpus, self.chunksize = obj, corpus, chunksize
-        for key, value in kwargs.items(): #add the new parameters like per_word_topics to base class object of LdaModel
+        for key, value in kwargs.items():  # add the new parameters like per_word_topics to base class object of LdaModel
             setattr(self.obj, key, value)
         self.metadata = False
 
@@ -125,10 +124,10 @@ def __iter__(self):
 
     def __getitem__(self, docno):
         if hasattr(self.corpus, '__getitem__'):
-           return self.obj[self.corpus[docno]]
+            return self.obj[self.corpus[docno]]
         else:
             raise RuntimeError('Type {} does not support slicing.'.format(type(self.corpus)))
-#endclass TransformedCorpus
+# endclass TransformedCorpus
 
 
 class TransformationABC(utils.SaveLoad):
@@ -157,14 +156,13 @@ def __getitem__(self, vec):
         """
         raise NotImplementedError('cannot instantiate abstract base class')
 
-
     def _apply(self, corpus, chunksize=None, **kwargs):
         """
         Apply the transformation to a whole corpus (as opposed to a single document)
         and return the result as another corpus.
         """
         return TransformedCorpus(self, corpus, chunksize, **kwargs)
-#endclass TransformationABC
+# endclass TransformationABC
 
 
 class SimilarityABC(utils.SaveLoad):
@@ -183,16 +181,15 @@ class SimilarityABC(utils.SaveLoad):
     similarities of each document in the corpus against the whole corpus (ie.,
     the query is each corpus document in turn).
     """
+
     def __init__(self, corpus):
         raise NotImplementedError("cannot instantiate Abstract Base Class")
 
-
     def get_similarities(self, doc):
         # (Sparse)MatrixSimilarity override this method so that they both use the
         # same  __getitem__ method, defined below
         raise NotImplementedError("cannot instantiate Abstract Base Class")
 
-
     def __getitem__(self, query):
         """Get similarities of document `query` to all documents in the corpus.
 
@@ -210,7 +207,7 @@ def __getitem__(self, query):
             # as well, but in that case assume tricks are happening and don't normalize
             # anything (self.normalize has no effect).
             if matutils.ismatrix(query):
-                import warnings
+                import warnings  # noqa:F401
                 # warnings.warn("non-gensim input must already come normalized")
             else:
                 if is_corpus:
@@ -225,7 +222,7 @@ def __getitem__(self, query):
         # if maintain_sparity is True, result is scipy sparse. Sort, clip the
         # topn and return as a scipy sparse matrix.
         if getattr(self, 'maintain_sparsity', False):
-                return matutils.scipy2scipy_clipped(result, self.num_best)
+            return matutils.scipy2scipy_clipped(result, self.num_best)
 
         # if the input query was a corpus (=more documents), compute the top-n
         # most similar for each document in turn
@@ -235,7 +232,6 @@ def __getitem__(self, query):
             # otherwise, return top-n of the single input document
             return matutils.full2sparse_clipped(result, self.num_best)
 
-
     def __iter__(self):
         """
         For each index document, compute cosine similarity against all other
@@ -276,4 +272,4 @@ def __iter__(self):
 
         # restore old normalization value
         self.normalize = norm
-#endclass SimilarityABC
+# endclass SimilarityABC
diff --git a/gensim/matutils.py b/gensim/matutils.py
index a8e427eb50..58904e1657 100644
--- a/gensim/matutils.py
+++ b/gensim/matutils.py
@@ -217,6 +217,7 @@ class Scipy2Corpus(object):
     This is the mirror function to `corpus2csc`.
 
     """
+
     def __init__(self, vecs):
         """
         `vecs` is a sequence of dense and/or sparse vectors, such as a 2d np array,
@@ -267,6 +268,7 @@ def full2sparse(vec, eps=1e-9):
     nnz = np.nonzero(abs(vec) > eps)[0]
     return list(zip(nnz, vec.take(nnz)))
 
+
 dense2vec = full2sparse
 
 
@@ -318,6 +320,7 @@ class Dense2Corpus(object):
     This is the mirror function to `corpus2dense`.
 
     """
+
     def __init__(self, dense, documents_columns=True):
         if documents_columns:
             self.dense = dense.T
@@ -330,7 +333,7 @@ def __iter__(self):
 
     def __len__(self):
         return len(self.dense)
-#endclass DenseCorpus
+# endclass DenseCorpus
 
 
 class Sparse2Corpus(object):
@@ -340,6 +343,7 @@ class Sparse2Corpus(object):
     This is the mirror function to `corpus2csc`.
 
     """
+
     def __init__(self, sparse, documents_columns=True):
         if documents_columns:
             self.sparse = sparse.tocsc()
@@ -352,7 +356,7 @@ def __iter__(self):
 
     def __len__(self):
         return self.sparse.shape[1]
-#endclass Sparse2Corpus
+# endclass Sparse2Corpus
 
 
 def veclen(vec):
@@ -431,7 +435,7 @@ def unitvec(vec, norm='l2'):
 
     try:
         first = next(iter(vec))  # is there at least one element?
-    except:
+    except Exception:
         return vec
 
     if isinstance(first, (tuple, list)) and len(first) == 2:  # gensim sparse format
@@ -544,7 +548,7 @@ def hellinger(vec1, vec2):
         vec1, vec2 = dict(vec1), dict(vec2)
         if len(vec2) < len(vec1):
             vec1, vec2 = vec2, vec1  # swap references so that we iterate over the shorter vector
-        sim = np.sqrt(0.5*sum((np.sqrt(value) - np.sqrt(vec2.get(index, 0.0)))**2 for index, value in iteritems(vec1)))
+        sim = np.sqrt(0.5 * sum((np.sqrt(value) - np.sqrt(vec2.get(index, 0.0)))**2 for index, value in iteritems(vec1)))
         return sim
     else:
         sim = np.sqrt(0.5 * ((np.sqrt(vec1) - np.sqrt(vec2))**2).sum())
@@ -778,7 +782,7 @@ def close(self):
         logger.debug("closing %s" % self.fname)
         if hasattr(self, 'fout'):
             self.fout.close()
-#endclass MmWriter
+# endclass MmWriter
 
 
 class MmReader(object):
@@ -790,6 +794,7 @@ class MmReader(object):
     matrix at once (unlike scipy.io.mmread). This allows us to process corpora
     which are larger than the available RAM.
     """
+
     def __init__(self, input, transposed=True):
         """
         Initialize the matrix reader.
@@ -864,7 +869,7 @@ def __iter__(self):
                 if docid != previd:
                     # change of document: return the document read so far (its id is prevId)
                     if previd >= 0:
-                        yield previd, document
+                        yield previd, document  # noqa:F821
 
                     # return implicit (empty) documents between previous id and new id
                     # too, to keep consistent document numbering and corpus length
@@ -912,4 +917,4 @@ def docbyoffset(self, offset):
 
             document.append((termid, val,))  # add another field to the current document
         return document
-#endclass MmReader
+# endclass MmReader
diff --git a/gensim/models/__init__.py b/gensim/models/__init__.py
index 47638c6b41..5c25a86fd5 100644
--- a/gensim/models/__init__.py
+++ b/gensim/models/__init__.py
@@ -4,23 +4,23 @@
 """
 
 # bring model classes directly into package namespace, to save some typing
-from .coherencemodel import CoherenceModel
-from .hdpmodel import HdpModel
-from .ldamodel import LdaModel
-from .lsimodel import LsiModel
-from .tfidfmodel import TfidfModel
-from .rpmodel import RpModel
-from .logentropy_model import LogEntropyModel
-from .word2vec import Word2Vec
-from .doc2vec import Doc2Vec
-from .keyedvectors import KeyedVectors
-from .ldamulticore import LdaMulticore
-from .phrases import Phrases
-from .normmodel import NormModel
-from .atmodel import AuthorTopicModel
-from .ldaseqmodel import LdaSeqModel
-
-from . import wrappers
+from .coherencemodel import CoherenceModel  # noqa:F401
+from .hdpmodel import HdpModel  # noqa:F401
+from .ldamodel import LdaModel  # noqa:F401
+from .lsimodel import LsiModel  # noqa:F401
+from .tfidfmodel import TfidfModel  # noqa:F401
+from .rpmodel import RpModel  # noqa:F401
+from .logentropy_model import LogEntropyModel  # noqa:F401
+from .word2vec import Word2Vec  # noqa:F401
+from .doc2vec import Doc2Vec  # noqa:F401
+from .keyedvectors import KeyedVectors  # noqa:F401
+from .ldamulticore import LdaMulticore  # noqa:F401
+from .phrases import Phrases  # noqa:F401
+from .normmodel import NormModel  # noqa:F401
+from .atmodel import AuthorTopicModel  # noqa:F401
+from .ldaseqmodel import LdaSeqModel  # noqa:F401
+
+from . import wrappers  # noqa:F401
 
 from gensim import interfaces, utils
 
@@ -42,12 +42,12 @@ class VocabTransform(interfaces.TransformationABC):
     >>>     ...
 
     """
+
     def __init__(self, old2new, id2token=None):
         # id2word = dict((newid, oldid2word[oldid]) for oldid, newid in old2new.iteritems())
         self.old2new = old2new
         self.id2token = id2token
 
-
     def __getitem__(self, bow):
         """
         Return representation with the ids transformed.
@@ -58,4 +58,4 @@ def __getitem__(self, bow):
             return self._apply(bow)
 
         return sorted((self.old2new[oldid], weight) for oldid, weight in bow if oldid in self.old2new)
-#endclass VocabTransform
+# endclass VocabTransform
diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py
index 335f2af7f0..7e1ee02d62 100755
--- a/gensim/models/atmodel.py
+++ b/gensim/models/atmodel.py
@@ -64,6 +64,7 @@ class AuthorTopicState(LdaState):
     reduce traffic.
 
     """
+
     def __init__(self, eta, lambda_shape, gamma_shape):
         self.eta = eta
         self.sstats = np.zeros(lambda_shape)
@@ -361,8 +362,8 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c
 
         """
         try:
-            _ = len(chunk)
-        except:
+            _ = len(chunk)  # noqa:F841
+        except Exception:
             # convert iterators/generators to plain list, so we have len() etc.
             chunk = list(chunk)
         if len(chunk) > 1:
@@ -596,7 +597,7 @@ def update(self, corpus=None, author2doc=None, doc2author=None, chunksize=None,
 
             try:
                 len_input_corpus = len(corpus)
-            except:
+            except Exception:
                 logger.warning("input corpus stream has no len(); counting documents")
                 len_input_corpus = sum(1 for _ in corpus)
             if len_input_corpus == 0:
diff --git a/gensim/models/coherencemodel.py b/gensim/models/coherencemodel.py
index 95a5117eee..4fa80ea15e 100644
--- a/gensim/models/coherencemodel.py
+++ b/gensim/models/coherencemodel.py
@@ -108,6 +108,7 @@ class CoherenceModel(interfaces.TransformationABC):
 
     Model persistency is achieved via its load/save methods.
     """
+
     def __init__(self, model=None, topics=None, texts=None, corpus=None, dictionary=None,
                  window_size=None, coherence='c_v', topn=10, processes=-1):
         """
diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py
index f859e261a2..5e0dc20418 100644
--- a/gensim/models/doc2vec.py
+++ b/gensim/models/doc2vec.py
@@ -48,7 +48,7 @@
 try:
     from queue import Queue
 except ImportError:
-    from Queue import Queue
+    from Queue import Queue  # noqa:F401
 
 from collections import namedtuple, defaultdict
 from timeit import default_timer
@@ -158,7 +158,7 @@ def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=N
             word2_indexes = [word2.index for pos2, word2 in window_pos if pos2 != pos]
             l1 = np_sum(word_vectors[word2_indexes], axis=0) + np_sum(doctag_vectors[doctag_indexes], axis=0)
             count = len(word2_indexes) + len(doctag_indexes)
-            if model.cbow_mean and count > 1 :
+            if model.cbow_mean and count > 1:
                 l1 /= count
             neu1e = train_cbow_pair(model, word, word2_indexes, l1, alpha,
                                     learn_vectors=False, learn_hidden=learn_hidden)
@@ -223,7 +223,6 @@ def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None,
                 padded_document_indexes[(pos - pre_pad_count): pos]  # preceding words
                 + padded_document_indexes[(pos + 1):(pos + 1 + post_pad_count)]  # following words
             )
-            word_context_len = len(word_context_indexes)
             predict_word = model.wv.vocab[model.wv.index2word[padded_document_indexes[pos]]]
             # numpy advanced-indexing copies; concatenate, flatten to 1d
             l1 = concatenate((doctag_vectors[doctag_indexes], word_vectors[word_context_indexes])).ravel()
@@ -253,6 +252,7 @@ class TaggedDocument(namedtuple('TaggedDocument', 'words tags')):
     Replaces "sentence as a list of words" from Word2Vec.
 
     """
+
     def __str__(self):
         return '%s(%s, %s)' % (self.__class__.__name__, self.words, self.tags)
 
@@ -288,6 +288,7 @@ class DocvecsArray(utils.SaveLoad):
     implementation, based on another persistence mechanism like LMDB, LevelDB,
     or SQLite, should also be possible.
     """
+
     def __init__(self, mapfile_path=None):
         self.doctags = {}  # string -> Doctag (only filled if necessary)
         self.max_rawint = -1  # highest rawint-indexed doctag
@@ -381,9 +382,9 @@ def estimated_lookup_memory(self):
     def reset_weights(self, model):
         length = max(len(self.doctags), self.count)
         if self.mapfile_path:
-            self.doctag_syn0 = np_memmap(self.mapfile_path+'.doctag_syn0', dtype=REAL,
+            self.doctag_syn0 = np_memmap(self.mapfile_path + '.doctag_syn0', dtype=REAL,
                                          mode='w+', shape=(length, model.vector_size))
-            self.doctag_syn0_lockf = np_memmap(self.mapfile_path+'.doctag_syn0_lockf', dtype=REAL,
+            self.doctag_syn0_lockf = np_memmap(self.mapfile_path + '.doctag_syn0_lockf', dtype=REAL,
                                                mode='w+', shape=(length,))
             self.doctag_syn0_lockf.fill(1.0)
         else:
@@ -416,7 +417,7 @@ def init_sims(self, replace=False):
             else:
                 if self.mapfile_path:
                     self.doctag_syn0norm = np_memmap(
-                        self.mapfile_path+'.doctag_syn0norm', dtype=REAL,
+                        self.mapfile_path + '.doctag_syn0norm', dtype=REAL,
                         mode='w+', shape=self.doctag_syn0.shape)
                 else:
                     self.doctag_syn0norm = empty(self.doctag_syn0.shape, dtype=REAL)
@@ -549,6 +550,7 @@ def repeat(self, word_count):
 
 class Doc2Vec(Word2Vec):
     """Class for training, using and evaluating neural networks described in http://arxiv.org/pdf/1405.4053v2.pdf"""
+
     def __init__(self, documents=None, dm_mean=None,
                  dm=1, dbow_words=0, dm_concat=0, dm_tag_count=1,
                  docvecs=None, docvecs_mapfile=None, comment=None, trim_rule=None, **kwargs):
@@ -863,6 +865,7 @@ def save_word2vec_format(self, fname, doctag_vec=False, word_vec=True, prefix='*
 class TaggedBrownCorpus(object):
     """Iterate over documents from the Brown corpus (part of NLTK data), yielding
     each document out as a TaggedDocument object."""
+
     def __init__(self, dirname):
         self.dirname = dirname
 
@@ -888,6 +891,7 @@ class TaggedLineDocument(object):
 
     Words are expected to be already preprocessed and separated by whitespace,
     tags are constructed automatically from the document line number."""
+
     def __init__(self, source):
         """
         `source` can be either a string (filename) or a file object.
diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py
index 46995549f8..b26c8fa639 100755
--- a/gensim/models/hdpmodel.py
+++ b/gensim/models/hdpmodel.py
@@ -50,6 +50,7 @@
 meanchangethresh = 0.00001
 rhot_bound = 0.0
 
+
 def expect_log_sticks(sticks):
     """
     For stick-breaking hdp, return the E[log(sticks)]
@@ -121,6 +122,7 @@ class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     Model persistency is achieved through its `load`/`save` methods.
 
     """
+
     def __init__(self, corpus, id2word, max_chunks=None, max_time=None,
                  chunksize=256, kappa=1.0, tau=64.0, K=15, T=150, alpha=1,
                  gamma=1, eta=0.01, scale=1.0, var_converge=0.0001,
@@ -302,7 +304,7 @@ def doc_e_step(self, doc, ss, Elogsticks_1st, word_list,
         chunkids = [unique_words[id] for id in doc_word_ids]
 
         Elogbeta_doc = self.m_Elogbeta[:, doc_word_ids]
-        ## very similar to the hdp equations
+        # very similar to the hdp equations
         v = np.zeros((2, self.m_K - 1))
         v[0] = 1.0
         v[1] = self.m_alpha
@@ -313,21 +315,20 @@ def doc_e_step(self, doc, ss, Elogsticks_1st, word_list,
         likelihood = 0.0
         old_likelihood = -1e200
         converge = 1.0
-        eps = 1e-100
 
         iter = 0
         max_iter = 100
         # not yet support second level optimization yet, to be done in the future
         while iter < max_iter and (converge < 0.0 or converge > var_converge):
-            ### update variational parameters
+            # update variational parameters
 
             # var_phi
             if iter < 3:
-                var_phi = np.dot(phi.T,  (Elogbeta_doc * doc_word_counts).T)
+                var_phi = np.dot(phi.T, (Elogbeta_doc * doc_word_counts).T)
                 (log_var_phi, log_norm) = matutils.ret_log_normalize_vec(var_phi)
                 var_phi = np.exp(log_var_phi)
             else:
-                var_phi = np.dot(phi.T,  (Elogbeta_doc * doc_word_counts).T) + Elogsticks_1st
+                var_phi = np.dot(phi.T, (Elogbeta_doc * doc_word_counts).T) + Elogsticks_1st
                 (log_var_phi, log_norm) = matutils.ret_log_normalize_vec(var_phi)
                 var_phi = np.exp(log_var_phi)
 
@@ -337,7 +338,7 @@ def doc_e_step(self, doc, ss, Elogsticks_1st, word_list,
                 (log_phi, log_norm) = matutils.ret_log_normalize_vec(phi)
                 phi = np.exp(log_phi)
             else:
-                phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd
+                phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd  # noqa:F821
                 (log_phi, log_norm) = matutils.ret_log_normalize_vec(phi)
                 phi = np.exp(log_phi)
 
@@ -406,7 +407,7 @@ def update_lambda(self, sstats, word_list, opt_o):
         if opt_o:
             self.optimal_ordering()
 
-        ## update top level sticks
+        # update top level sticks
         self.m_var_sticks[0] = self.m_varphi_ss[:self.m_T - 1] + 1.0
         var_phi_sum = np.flipud(self.m_varphi_ss[1:])
         self.m_var_sticks[1] = np.flipud(np.cumsum(var_phi_sum)) + self.m_gamma
@@ -529,7 +530,7 @@ def hdp_to_lda(self):
         alpha = alpha * self.m_alpha
 
         # beta
-        beta = (self.m_lambda + self.m_eta) / (self.m_W * self.m_eta + \
+        beta = (self.m_lambda + self.m_eta) / (self.m_W * self.m_eta +
                 self.m_lambda_sum[:, np.newaxis])
 
         return (alpha, beta)
@@ -564,7 +565,7 @@ def evaluate_test_corpus(self, corpus):
                 total_words += sum(doc_word_counts)
         logger.info('TEST: average score: %.5f, total score: %.5f,  test docs: %d' % (score / total_words, score, len(corpus)))
         return score
-#endclass HdpModel
+# endclass HdpModel
 
 
 class HdpTopicFormatter(object):
@@ -624,14 +625,14 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
 
         return shown
 
-    def print_topic(self, topic_id, topn= None, num_words=None):
+    def print_topic(self, topic_id, topn=None, num_words=None):
         if num_words is not None:  # deprecated num_words is used
             warnings.warn("The parameter num_words for print_topic() would be deprecated in the updated version. Please use topn instead.")
             topn = num_words
 
         return self.show_topic(topic_id, topn, formatted=True)
 
-    def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words= None,):
+    def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words=None,):
         if num_words is not None:  # deprecated num_words is used
             warnings.warn("The parameter num_words for show_topic() would be deprecated in the updated version. Please use topn instead.")
             topn = num_words
@@ -656,7 +657,6 @@ def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words= N
         # we only return the topic_terms
         return topic[1]
 
-
     def show_topic_terms(self, topic_data, num_words):
         return [(self.dictionary[wid], weight) for (weight, wid) in topic_data[:num_words]]
 
diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
index 3568f43ab5..c2dfd419cb 100644
--- a/gensim/models/keyedvectors.py
+++ b/gensim/models/keyedvectors.py
@@ -59,7 +59,7 @@
 try:
     from queue import Queue, Empty
 except ImportError:
-    from Queue import Queue, Empty
+    from Queue import Queue, Empty  # noqa:F401
 
 # If pyemd C extension is available, import it.
 # If pyemd is attempted to be used, but isn't installed, ImportError will be raised in wmdistance
@@ -69,9 +69,9 @@
 except ImportError:
     PYEMD_EXT = False
 
-from numpy import exp, log, dot, zeros, outer, random, dtype, float32 as REAL,\
-    double, uint32, seterr, array, uint8, vstack, fromstring, sqrt, newaxis,\
-    ndarray, empty, sum as np_sum, prod, ones, ascontiguousarray
+from numpy import dot, zeros, dtype, float32 as REAL,\
+    double, array, vstack, fromstring, sqrt, newaxis,\
+    ndarray, sum as np_sum, prod, ascontiguousarray
 
 from gensim import utils, matutils  # utility fnc for pickling, common scipy operations etc
 from gensim.corpora.dictionary import Dictionary
@@ -94,6 +94,7 @@ class Vocab(object):
     and for constructing binary trees (incl. both word leaves and inner nodes).
 
     """
+
     def __init__(self, **kwargs):
         self.count = 0
         self.__dict__.update(kwargs)
@@ -111,6 +112,7 @@ class KeyedVectors(utils.SaveLoad):
     Class to contain vectors and vocab for the Word2Vec training class and other w2v methods not directly
     involved in training such as most_similar()
     """
+
     def __init__(self):
         self.syn0 = []
         self.syn0norm = None
@@ -160,7 +162,6 @@ def save_word2vec_format(self, fname, fvocab=None, binary=False, total_vec=None)
                 else:
                     fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row))))
 
-
     @classmethod
     def load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8', unicode_errors='strict',
                              limit=None, datatype=REAL):
@@ -422,7 +423,7 @@ def wmdistance(self, document1, document2):
         distance_matrix = zeros((vocab_len, vocab_len), dtype=double)
         for i, t1 in dictionary.items():
             for j, t2 in dictionary.items():
-                if not t1 in docset1 or not t2 in docset2:
+                if t1 not in docset1 or t2 not in docset2:
                     continue
                 # Compute Euclidean distance between word vectors.
                 distance_matrix[i, j] = sqrt(np_sum((self[t1] - self[t2])**2))
@@ -480,7 +481,7 @@ def most_similar_cosmul(self, positive=None, negative=None, topn=10):
             # allow calls like most_similar_cosmul('dog'), as a shorthand for most_similar_cosmul(['dog'])
             positive = [positive]
 
-        all_words = set([self.vocab[word].index for word in positive+negative
+        all_words = set([self.vocab[word].index for word in positive + negative
             if not isinstance(word, ndarray) and word in self.vocab])
 
         positive = [
@@ -572,7 +573,6 @@ def doesnt_match(self, words):
         return sorted(zip(dists, used_words))[0][1]
 
     def __getitem__(self, words):
-
         """
         Accept a single word or a list of words as input.
 
@@ -692,7 +692,7 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar, c
                         a, b, c, expected = [word.upper() for word in line.split()]
                     else:
                         a, b, c, expected = [word for word in line.split()]
-                except:
+                except Exception:
                     logger.info("skipping invalid line #%i in %s" % (line_no, questions))
                     continue
                 if a not in ok_vocab or b not in ok_vocab or c not in ok_vocab or expected not in ok_vocab:
@@ -783,7 +783,7 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
                     else:
                         a, b, sim = [word for word in line.split(delimiter)]
                     sim = float(sim)
-                except:
+                except Exception:
                     logger.info('skipping invalid line #%d in %s', line_no, pairs)
                     continue
                 if a not in ok_vocab or b not in ok_vocab:
@@ -814,7 +814,6 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
         self.log_evaluate_word_pairs(pearson, spearman, oov_ratio, pairs)
         return pearson, spearman, oov_ratio
 
-
     def init_sims(self, replace=False):
         """
         Precompute L2-normalized vectors.
diff --git a/gensim/models/lda_dispatcher.py b/gensim/models/lda_dispatcher.py
index ea54a9c18b..91e7f237c7 100755
--- a/gensim/models/lda_dispatcher.py
+++ b/gensim/models/lda_dispatcher.py
@@ -44,7 +44,7 @@
 # timeout for the Queue object put/get blocking methods.
 # it should theoretically be infinity, but then keyboard interrupts don't work.
 # so this is really just a hack, see http://bugs.python.org/issue1360
-HUGE_TIMEOUT = 365 * 24 * 60 * 60 # one year
+HUGE_TIMEOUT = 365 * 24 * 60 * 60  # one year
 
 LDA_DISPATCHER_PREFIX = 'gensim.lda_dispatcher'
 
@@ -123,7 +123,7 @@ def getstate(self):
         logger.info("end of input, assigning all remaining jobs")
         logger.debug("jobs done: %s, jobs received: %s" % (self._jobsdone, self._jobsreceived))
         while self._jobsdone < self._jobsreceived:
-            time.sleep(0.5) # check every half a second
+            time.sleep(0.5)  # check every half a second
 
         logger.info("merging states from %i workers" % len(self.workers))
         workers = list(self.workers.values())
@@ -159,14 +159,12 @@ def jobdone(self, workerid):
         """
         self._jobsdone += 1
         logger.info("worker #%s finished job #%i" % (workerid, self._jobsdone))
-        self.workers[workerid].requestjob() # tell the worker to ask for another job, asynchronously (one-way)
-
+        self.workers[workerid].requestjob()  # tell the worker to ask for another job, asynchronously (one-way)
 
     def jobsdone(self):
         """Wrap self._jobsdone, needed for remote access through Pyro proxies"""
         return self._jobsdone
 
-
     @Pyro4.oneway
     def exit(self):
         """
@@ -176,8 +174,8 @@ def exit(self):
             logger.info("terminating worker %s" % workerid)
             worker.exit()
         logger.info("terminating dispatcher")
-        os._exit(0) # exit the whole process (not just this thread ala sys.exit())
-#endclass Dispatcher
+        os._exit(0)  # exit the whole process (not just this thread ala sys.exit())
+# endclass Dispatcher
 
 
 def main():
diff --git a/gensim/models/lda_worker.py b/gensim/models/lda_worker.py
index fbae4c0fff..ec87c29148 100755
--- a/gensim/models/lda_worker.py
+++ b/gensim/models/lda_worker.py
@@ -46,8 +46,8 @@ def __init__(self):
     @Pyro4.expose
     def initialize(self, myid, dispatcher, **model_params):
         self.lock_update = threading.Lock()
-        self.jobsdone = 0 # how many jobs has this worker completed?
-        self.myid = myid # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove?
+        self.jobsdone = 0  # how many jobs has this worker completed?
+        self.myid = myid  # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove?
         self.dispatcher = dispatcher
         self.finished = False
         logger.info("initializing worker #%s" % myid)
@@ -76,7 +76,6 @@ def requestjob(self):
         else:
             logger.info("worker #%i stopping asking for jobs" % self.myid)
 
-
     @utils.synchronous('lock_update')
     def processjob(self, job):
         logger.debug("starting to process job #%i" % self.jobsdone)
@@ -94,7 +93,7 @@ def getstate(self):
                     (self.myid, self.jobsdone))
         result = self.model.state
         assert isinstance(result, ldamodel.LdaState)
-        self.model.clear() # free up mem in-between two EM cycles
+        self.model.clear()  # free up mem in-between two EM cycles
         self.finished = True
         return result
 
@@ -108,12 +107,11 @@ def reset(self, state):
         self.model.state.reset()
         self.finished = False
 
-
     @Pyro4.oneway
     def exit(self):
         logger.info("terminating worker #%i" % self.myid)
         os._exit(0)
-#endclass Worker
+# endclass Worker
 
 
 def main():
diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py
index 4dc1a024b8..9ed01d84c8 100755
--- a/gensim/models/ldamodel.py
+++ b/gensim/models/ldamodel.py
@@ -33,7 +33,6 @@
 import logging
 import numbers
 import os
-from random import sample
 
 import numpy as np
 import six
@@ -92,6 +91,7 @@ class LdaState(utils.SaveLoad):
     reduce traffic.
 
     """
+
     def __init__(self, eta, shape):
         self.eta = eta
         self.sstats = np.zeros(shape)
@@ -191,6 +191,7 @@ class LdaModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
 
     Model persistency is achieved through its `load`/`save` methods.
     """
+
     def __init__(self, corpus=None, num_topics=100, id2word=None,
                  distributed=False, chunksize=2000, passes=1, update_every=1,
                  alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10,
@@ -412,7 +413,7 @@ def inference(self, chunk, collect_sstats=False):
         """
         try:
             _ = len(chunk)
-        except:
+        except Exception:
             # convert iterators/generators to plain list, so we have len() etc.
             chunk = list(chunk)
         if len(chunk) > 1:
@@ -588,7 +589,7 @@ def update(self, corpus, chunksize=None, decay=None, offset=None,
 
         try:
             lencorpus = len(corpus)
-        except:
+        except Exception:
             logger.warning("input corpus stream has no len(); counting documents")
             lencorpus = sum(1 for _ in corpus)
         if lencorpus == 0:
diff --git a/gensim/models/ldamulticore.py b/gensim/models/ldamulticore.py
index e6cdd4ccec..39c3c40666 100644
--- a/gensim/models/ldamulticore.py
+++ b/gensim/models/ldamulticore.py
@@ -77,6 +77,7 @@ class LdaMulticore(LdaModel):
     Model persistency is achieved through its `load`/`save` methods.
 
     """
+
     def __init__(self, corpus=None, num_topics=100, id2word=None, workers=None,
                  chunksize=2000, passes=1, batch=False, alpha='symmetric',
                  eta=None, decay=0.5, offset=1.0, eval_every=10, iterations=50,
@@ -125,7 +126,7 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, workers=None,
 
         `decay` and `offset` parameters are the same as Kappa and Tau_0 in
         Hoffman et al, respectively.
-        
+
         `random_state` can be a numpy.random.RandomState object or the seed for one
 
         Example:
@@ -145,10 +146,9 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, workers=None,
         super(LdaMulticore, self).__init__(corpus=corpus, num_topics=num_topics,
             id2word=id2word, chunksize=chunksize, passes=passes, alpha=alpha, eta=eta,
             decay=decay, offset=offset, eval_every=eval_every, iterations=iterations,
-            gamma_threshold=gamma_threshold, random_state=random_state, minimum_probability= minimum_probability,
+            gamma_threshold=gamma_threshold, random_state=random_state, minimum_probability=minimum_probability,
             minimum_phi_value=minimum_phi_value, per_word_topics=per_word_topics)
 
-
     def update(self, corpus, chunks_as_numpy=False):
         """
         Train the model with new documents, by EM-iterating over `corpus` until
@@ -169,7 +169,7 @@ def update(self, corpus, chunks_as_numpy=False):
         """
         try:
             lencorpus = len(corpus)
-        except:
+        except Exception:
             logger.warning("input corpus stream has no len(); counting documents")
             lencorpus = sum(1 for _ in corpus)
         if lencorpus == 0:
@@ -249,7 +249,7 @@ def process_result_queue(force=False):
                         process_result_queue()
 
                 process_result_queue()
-            #endfor single corpus pass
+            # endfor single corpus pass
 
             # wait for all outstanding jobs to finish
             while queue_size[0] > 0:
@@ -257,7 +257,7 @@ def process_result_queue(force=False):
 
             if reallen != lencorpus:
                 raise RuntimeError("input corpus size changed during training (don't use generators as input)")
-        #endfor entire update
+        # endfor entire update
 
         pool.terminate()
 
diff --git a/gensim/models/ldaseqmodel.py b/gensim/models/ldaseqmodel.py
index f6a512e3b4..6399e17aae 100644
--- a/gensim/models/ldaseqmodel.py
+++ b/gensim/models/ldaseqmodel.py
@@ -32,6 +32,7 @@
 
 logger = logging.getLogger('gensim.models.ldaseqmodel')
 
+
 class LdaSeqModel(utils.SaveLoad):
     """
     The constructor estimates Dynamic Topic Model parameters based
@@ -90,7 +91,7 @@ def __init__(self, corpus=None, time_slice=None, id2word=None, alphas=0.01, num_
         if corpus is not None:
             try:
                 self.corpus_len = len(corpus)
-            except:
+            except Exception:
                 logger.warning("input corpus stream has no len(); counting documents")
                 self.corpus_len = sum(1 for _ in corpus)
 
@@ -137,7 +138,6 @@ def __init__(self, corpus=None, time_slice=None, id2word=None, alphas=0.01, num_
             # fit DTM
             self.fit_lda_seq(corpus, lda_inference_max_iter, em_min_iter, em_max_iter, chunksize)
 
-
     def init_ldaseq_ss(self, topic_chain_variance, topic_obs_variance, alpha, init_suffstats):
         """
         Method to initialize State Space Language Model, topic wise.
@@ -152,7 +152,6 @@ def init_ldaseq_ss(self, topic_chain_variance, topic_obs_variance, alpha, init_s
             # ldaseq.topic_chains[k].w_phi_sum = np.zeros((ldaseq.vocab_len, ldaseq.num_time_slices))
             # ldaseq.topic_chains[k].w_phi_sq = np.zeros((ldaseq.vocab_len, ldaseq.num_time_slices))
 
-
     def fit_lda_seq(self, corpus, lda_inference_max_iter, em_min_iter, em_max_iter, chunksize):
         """
         fit an lda sequence model:
@@ -228,7 +227,6 @@ def fit_lda_seq(self, corpus, lda_inference_max_iter, em_min_iter, em_max_iter,
 
         return bound
 
-
     def lda_seq_infer(self, corpus, topic_suffstats, gammas, lhoods, iter_, lda_inference_max_iter, chunksize):
         """
         Inference or E- Step.
@@ -252,17 +250,15 @@ def lda_seq_infer(self, corpus, topic_suffstats, gammas, lhoods, iter_, lda_infe
 
         return bound, gammas
 
-
     def inferDTMseq(self, corpus, topic_suffstats, gammas, lhoods, lda, ldapost, iter_, bound, lda_inference_max_iter, chunksize):
         """
         Computes the likelihood of a sequential corpus under an LDA seq model, and return the likelihood bound.
         Need to pass the LdaSeq model, corpus, sufficient stats, gammas and lhoods matrices previously created,
         and LdaModel and LdaPost class objects.
         """
-        doc_index = 0 # overall doc_index in corpus
-        time = 0 # current time-slice
-        doc_num = 0  # doc-index in current time-lice
-        num_topics = self.num_topics
+        doc_index = 0  # overall doc_index in corpus
+        time = 0  # current time-slice
+        doc_num = 0  # doc-index in current time-slice
         lda = self.make_lda_seq_slice(lda, time)  # create lda_seq slice
 
         time_slice = np.cumsum(np.array(self.time_slice))
@@ -299,7 +295,6 @@ def inferDTMseq(self, corpus, topic_suffstats, gammas, lhoods, lda, ldapost, ite
 
         return bound, gammas
 
-
     def make_lda_seq_slice(self, lda, time):
         """
         set up the LDA model topic-word values with that of ldaseq.
@@ -310,7 +305,6 @@ def make_lda_seq_slice(self, lda, time):
         lda.alpha = np.copy(self.alphas)
         return lda
 
-
     def fit_lda_seq_topics(self, topic_suffstats):
         """
         Fit lda sequence topic wise.
@@ -325,7 +319,6 @@ def fit_lda_seq_topics(self, topic_suffstats):
 
         return lhood
 
-
     def print_topic_times(self, topic, top_terms=20):
         """
         Prints one topic showing each time-slice.
@@ -336,17 +329,15 @@ def print_topic_times(self, topic, top_terms=20):
 
         return topics
 
-
     def print_topics(self, time=0, top_terms=20):
         """
         Prints all topics in a particular time-slice.
         """
-        topics =[]
+        topics = []
         for topic in range(0, self.num_topics):
             topics.append(self.print_topic(topic, time, top_terms))
         return topics
 
-
     def print_topic(self, topic, time=0, top_terms=20):
         """
         Topic is the topic number
@@ -361,7 +352,6 @@ def print_topic(self, topic, time=0, top_terms=20):
         beststr = [(self.id2word[id_], topic[id_]) for id_ in bestn]
         return beststr
 
-
     def doc_topics(self, doc_number):
         """
         On passing the LdaSeqModel trained ldaseq object, the doc_number of your document in the corpus,
@@ -371,7 +361,6 @@ def doc_topics(self, doc_number):
         doc_topic /= doc_topic.sum(axis=1)[:, np.newaxis]
         return doc_topic[doc_number]
 
-
     def dtm_vis(self, time, corpus):
         """
         returns term_frequency, vocab, doc_lengths, topic-term distributions and doc_topic distributions, specified by pyLDAvis format.
@@ -395,7 +384,6 @@ def dtm_vis(self, time, corpus):
         # these should be passed to the `pyLDAvis.prepare` method to visualise one time-slice of DTM topics.
         return doc_topic, np.array(topic_term), doc_lengths, term_frequency, vocab
 
-
     def dtm_coherence(self, time):
         """
         returns all topics of a particular time-slice without probabilitiy values for it to be used
@@ -441,11 +429,12 @@ class sslm(utils.SaveLoad):
     `fwd_mean`, `fwd_variance` are the forward posterior values.
     `zeta` is an extra variational parameter with a value for each time-slice
     """
+
     def __init__(self, vocab_len=None, num_time_slices=None, num_topics=None, obs_variance=0.5, chain_variance=0.005):
         self.vocab_len = vocab_len
         self.num_time_slices = num_time_slices
         self.obs_variance = obs_variance
-        self.chain_variance= chain_variance
+        self.chain_variance = chain_variance
         self.num_topics = num_topics
 
         # setting up matrices
@@ -467,7 +456,6 @@ def __init__(self, vocab_len=None, num_time_slices=None, num_topics=None, obs_va
         self.w_phi_l_sq = None
         self.m_update_coeff_g = None
 
-
     def update_zeta(self):
         """
         Updates the Zeta Variational Parameter.
@@ -478,7 +466,6 @@ def update_zeta(self):
             self.zeta[j] = np.sum(np.exp(self.mean[:, j + 1] + self.variance[:, j + 1] / 2))
         return self.zeta
 
-
     def compute_post_variance(self, word, chain_variance):
         """
         Based on the Variational Kalman Filtering approach for Approximate Inference [https://www.cs.princeton.edu/~blei/papers/BleiLafferty2006a.pdf]
@@ -514,12 +501,11 @@ def compute_post_variance(self, word, chain_variance):
             if fwd_variance[t] > 0.0:
                 c = np.power((fwd_variance[t] / (fwd_variance[t] + chain_variance)), 2)
             else:
-                c  = 0
+                c = 0
             variance[t] = (c * (variance[t + 1] - chain_variance)) + ((1 - c) * fwd_variance[t])
 
         return variance, fwd_variance
 
-
     def compute_post_mean(self, word, chain_variance):
         """
         Based on the Variational Kalman Filtering approach for Approximate Inference [https://www.cs.princeton.edu/~blei/papers/BleiLafferty2006a.pdf]
@@ -555,7 +541,6 @@ def compute_post_mean(self, word, chain_variance):
             mean[t] = c * fwd_mean[t] + (1 - c) * mean[t + 1]
         return mean, fwd_mean
 
-
     def compute_expected_log_prob(self):
         """
         Compute the expected log probability given values of m.
@@ -566,7 +551,6 @@ def compute_expected_log_prob(self):
             self.e_log_prob[w][t] = self.mean[w][t + 1] - np.log(self.zeta[t])
         return self.e_log_prob
 
-
     def sslm_counts_init(self, obs_variance, chain_variance, sstats):
         """
         Initialize State Space Language Model with LDA sufficient statistics.
@@ -595,7 +579,6 @@ def sslm_counts_init(self, obs_variance, chain_variance, sstats):
         self.zeta = self.update_zeta()
         self.e_log_prob = self.compute_expected_log_prob()
 
-
     def fit_sslm(self, sstats):
         """
         Fits variational distribution.
@@ -643,7 +626,6 @@ def fit_sslm(self, sstats):
         self.e_log_prob = self.compute_expected_log_prob()
         return bound
 
-
     def compute_bound(self, sstats, totals):
         """
         Compute log probability bound.
@@ -694,7 +676,6 @@ def compute_bound(self, sstats, totals):
 
         return val
 
-
     def update_obs(self, sstats, totals):
         """
         Function to perform optimization of obs. Parameters are suff_stats set up in the fit_sslm method.
@@ -758,7 +739,6 @@ def update_obs(self, sstats, totals):
 
         return self.obs, self.zeta
 
-
     def compute_mean_deriv(self, word, time, deriv):
         """
         Used in helping find the optimum function.
@@ -791,7 +771,6 @@ def compute_mean_deriv(self, word, time, deriv):
 
         return deriv
 
-
     def compute_obs_deriv(self, word, word_counts, totals, mean_deriv_mtx, deriv):
         """
         Derivation of obs which is used in derivative function [df_obs] while optimizing.
@@ -824,7 +803,7 @@ def compute_obs_deriv(self, word, word_counts, totals, mean_deriv_mtx, deriv):
 
             for u in range(1, T + 1):
                 mean_u = mean[u]
-                variance_u_prev = variance[u - 1]
+                variance_u_prev = variance[u - 1]  # noqa:F841
                 mean_u_prev = mean[u - 1]
                 dmean_u = mean_deriv[u]
                 dmean_u_prev = mean_deriv[u - 1]
@@ -848,6 +827,7 @@ def compute_obs_deriv(self, word, word_counts, totals, mean_deriv_mtx, deriv):
         return deriv
 # endclass sslm
 
+
 class LdaPost(utils.SaveLoad):
 
     """
@@ -876,7 +856,6 @@ def __init__(self, doc=None, lda=None, max_doc_len=None, num_topics=None, gamma=
         self.doc_weight = None
         self.renormalized_doc_weight = None
 
-
     def update_phi(self, doc_number, time):
         """
         Update variational multinomial parameters, based on a document and a time-slice.
@@ -910,18 +889,17 @@ def update_phi(self, doc_number, time):
             phi_row = np.exp(log_phi_row)
             self.log_phi[n] = log_phi_row
             self.phi[n] = phi_row
-            n +=1 # increase iteration
+            n += 1  # increase iteration
 
         return self.phi, self.log_phi
 
-
     def update_gamma(self):
         """
         update variational dirichlet parameters as described in the original Blei LDA paper:
         gamma = alpha + sum(phi), over every topic for every word.
         """
         self.gamma = np.copy(self.lda.alpha)
-        n = 0 # keep track of number of iterations for phi, log_phi
+        n = 0  # keep track of number of iterations for phi, log_phi
         for word_id, count in self.doc:
             phi_row = self.phi[n]
             for k in range(0, self.lda.num_topics):
@@ -930,18 +908,16 @@ def update_gamma(self):
 
         return self.gamma
 
-
     def init_lda_post(self):
         """
         Initialize variational posterior, does not return anything.
         """
         total = sum(count for word_id, count in self.doc)
         self.gamma.fill(self.lda.alpha[0] + float(total) / self.lda.num_topics)
-        self.phi[:len(self.doc),:] = 1.0 / self.lda.num_topics
+        self.phi[:len(self.doc), :] = 1.0 / self.lda.num_topics
         # doc_weight used during DIM
         # ldapost.doc_weight = None
 
-
     def compute_lda_lhood(self):
         """
         compute the likelihood bound
@@ -959,7 +935,7 @@ def compute_lda_lhood(self):
         # influence_term = 0
         digsum = digamma(gamma_sum)
 
-        model = "DTM"
+        model = "DTM"  # noqa:F841
         for k in range(0, num_topics):
             # below code only to be used in DIM mode
             # if ldapost.doc_weight is not None and (model == "DIM" or model == "fixed"):
@@ -1034,7 +1010,6 @@ def fit_lda_post(self, doc_number, time, ldaseq, LDA_INFERENCE_CONVERGED=1e-8,
 
         return lhood
 
-
     def update_lda_seq_ss(self, time, doc, topic_suffstats):
         """
         Update lda sequence sufficient statistics from an lda posterior.
@@ -1085,7 +1060,7 @@ def f_obs(x, *args):
     for t in range(1, T + 1):
         mean_t = mean[t]
         mean_t_prev = mean[t - 1]
-        var_t_prev = variance[t - 1]
+        var_t_prev = variance[t - 1]  # noqa:F841
 
         val = mean_t - mean_t_prev
         term1 += val * val
@@ -1107,8 +1082,8 @@ def f_obs(x, *args):
 
     return final
 
-def df_obs(x, *args):
 
+def df_obs(x, *args):
     """
     Derivative of function which optimises obs.
     """
@@ -1121,6 +1096,6 @@ def df_obs(x, *args):
     if model == "DTM":
         deriv = sslm.compute_obs_deriv(word, word_counts, totals, mean_deriv_mtx, deriv)
     elif model == "DIM":
-        deriv = sslm.compute_obs_deriv_fixed(p.word, p.word_counts, p.totals, p.sslm, p.mean_deriv_mtx, deriv)
+        deriv = sslm.compute_obs_deriv_fixed(p.word, p.word_counts, p.totals, p.sslm, p.mean_deriv_mtx, deriv)  # noqa:F821
 
     return np.negative(deriv)
diff --git a/gensim/models/lsi_dispatcher.py b/gensim/models/lsi_dispatcher.py
index 8c4fb78dd3..5a69327522 100755
--- a/gensim/models/lsi_dispatcher.py
+++ b/gensim/models/lsi_dispatcher.py
@@ -15,7 +15,11 @@
 
 
 from __future__ import with_statement
-import os, sys, logging, threading, time
+import os
+import sys
+import logging
+import threading
+import time
 from six import iteritems, itervalues
 try:
     from Queue import Queue
@@ -37,8 +41,7 @@
 # timeout for the Queue object put/get blocking methods.
 # it should really be infinity, but then keyboard interrupts don't work.
 # so this is really just a hack, see http://bugs.python.org/issue1360
-HUGE_TIMEOUT = 365 * 24 * 60 * 60 # one year
-
+HUGE_TIMEOUT = 365 * 24 * 60 * 60  # one year
 
 
 class Dispatcher(object):
@@ -55,7 +58,7 @@ def __init__(self, maxsize=0):
         """
         self.maxsize = maxsize
         self.workers = {}
-        self.callback = None # a pyro proxy to this object (unknown at init time, but will be set later)
+        self.callback = None  # a pyro proxy to this object (unknown at init time, but will be set later)
 
     @Pyro4.expose
     def initialize(self, **model_params):
@@ -71,7 +74,7 @@ def initialize(self, **model_params):
         # locate all available workers and store their proxies, for subsequent RMI calls
         self.workers = {}
         with utils.getNS() as ns:
-            self.callback = Pyro4.Proxy('PYRONAME:gensim.lsi_dispatcher') # = self
+            self.callback = Pyro4.Proxy('PYRONAME:gensim.lsi_dispatcher')  # = self
             for name, uri in iteritems(ns.list(prefix='gensim.lsi_worker')):
                 try:
                     worker = Pyro4.Proxy(uri)
@@ -115,7 +118,7 @@ def getstate(self):
         logger.info("end of input, assigning all remaining jobs")
         logger.debug("jobs done: %s, jobs received: %s" % (self._jobsdone, self._jobsreceived))
         while self._jobsdone < self._jobsreceived:
-            time.sleep(0.5) # check every half a second
+            time.sleep(0.5)  # check every half a second
 
         # TODO: merge in parallel, so that we're done in `log_2(workers)` merges,
         # and not `workers - 1` merges!
@@ -156,14 +159,12 @@ def jobdone(self, workerid):
         self._jobsdone += 1
         logger.info("worker #%s finished job #%i" % (workerid, self._jobsdone))
         worker = self.workers[workerid]
-        worker.requestjob() # tell the worker to ask for another job, asynchronously (one-way)
-
+        worker.requestjob()  # tell the worker to ask for another job, asynchronously (one-way)
 
     def jobsdone(self):
         """Wrap self._jobsdone, needed for remote access through proxies"""
         return self._jobsdone
 
-
     @Pyro4.oneway
     def exit(self):
         """
@@ -173,9 +174,8 @@ def exit(self):
             logger.info("terminating worker %s" % workerid)
             worker.exit()
         logger.info("terminating dispatcher")
-        os._exit(0) # exit the whole process (not just this thread ala sys.exit())
-#endclass Dispatcher
-
+        os._exit(0)  # exit the whole process (not just this thread ala sys.exit())
+# endclass Dispatcher
 
 
 def main():
@@ -197,6 +197,5 @@ def main():
     logger.info("finished running %s" % program)
 
 
-
 if __name__ == '__main__':
     main()
diff --git a/gensim/models/lsi_worker.py b/gensim/models/lsi_worker.py
index b9de939962..4cae372ffd 100755
--- a/gensim/models/lsi_worker.py
+++ b/gensim/models/lsi_worker.py
@@ -17,7 +17,9 @@
 
 
 from __future__ import with_statement
-import os, sys, logging
+import os
+import sys
+import logging
 import threading
 import tempfile
 try:
@@ -31,8 +33,7 @@
 logger = logging.getLogger('gensim.models.lsi_worker')
 
 
-SAVE_DEBUG = 0 # save intermediate models after every SAVE_DEBUG updates (0 for never)
-
+SAVE_DEBUG = 0  # save intermediate models after every SAVE_DEBUG updates (0 for never)
 
 
 class Worker(object):
@@ -42,8 +43,8 @@ def __init__(self):
     @Pyro4.expose
     def initialize(self, myid, dispatcher, **model_params):
         self.lock_update = threading.Lock()
-        self.jobsdone = 0 # how many jobs has this worker completed?
-        self.myid = myid # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove?
+        self.jobsdone = 0  # how many jobs has this worker completed?
+        self.myid = myid  # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove?
         self.dispatcher = dispatcher
         self.finished = False
         logger.info("initializing worker #%s" % myid)
@@ -72,7 +73,6 @@ def requestjob(self):
         else:
             logger.info("worker #%i stopping asking for jobs" % self.myid)
 
-
     @utils.synchronous('lock_update')
     def processjob(self, job):
         self.model.add_documents(job)
@@ -97,17 +97,15 @@ def reset(self):
         self.model.projection = self.model.projection.empty_like()
         self.finished = False
 
-
     @Pyro4.oneway
     def exit(self):
         logger.info("terminating worker #%i" % self.myid)
         os._exit(0)
-#endclass Worker
-
+# endclass Worker
 
 
 def main():
-    logging.basicConfig(format = '%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
+    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
     logger.info("running %s" % " ".join(sys.argv))
 
     program = os.path.basename(sys.argv[0])
@@ -121,6 +119,5 @@ def main():
     logger.info("finished running %s" % program)
 
 
-
 if __name__ == '__main__':
     main()
diff --git a/gensim/models/lsimodel.py b/gensim/models/lsimodel.py
index 8e326fdc6c..f9e16cd23b 100644
--- a/gensim/models/lsimodel.py
+++ b/gensim/models/lsimodel.py
@@ -218,7 +218,7 @@ def merge(self, other, decay=1.0):
                     self.u[:, i] *= -1.0
 #        diff = np.dot(self.u.T, self.u) - np.eye(self.u.shape[1])
 #        logger.info('orth error after=%f' % np.sum(diff * diff))
-#endclass Projection
+# endclass Projection
 
 
 class LsiModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
@@ -242,6 +242,7 @@ class LsiModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
     .. [2] https://github.com/piskvorky/gensim/wiki/Recipes-&-FAQ#q4-how-do-you-output-the-u-s-vt-matrices-of-lsi
 
     """
+
     def __init__(self, corpus=None, num_topics=200, id2word=None, chunksize=20000,
                  decay=1.0, distributed=False, onepass=True,
                  power_iters=P2_EXTRA_ITERS, extra_samples=P2_EXTRA_DIMS):
@@ -328,7 +329,6 @@ def __init__(self, corpus=None, num_topics=200, id2word=None, chunksize=20000,
         if corpus is not None:
             self.add_documents(corpus)
 
-
     def add_documents(self, corpus, chunksize=None, decay=None):
         """
         Update singular value decomposition to take into account a new
@@ -581,7 +581,7 @@ def load(cls, fname, *args, **kwargs):
         except Exception as e:
             logging.warning("failed to load projection from %s: %s" % (projection_fname, e))
         return result
-#endclass LsiModel
+# endclass LsiModel
 
 
 def print_debug(id2token, u, s, topics, num_words=10, num_neg=None):
diff --git a/gensim/models/normmodel.py b/gensim/models/normmodel.py
index 07bdcce650..a78dc604dc 100644
--- a/gensim/models/normmodel.py
+++ b/gensim/models/normmodel.py
@@ -29,6 +29,7 @@ class NormModel(interfaces.TransformationABC):
 
     Model persistency is achieved via its load/save methods
     """
+
     def __init__(self, corpus=None, norm='l2'):
         """
         Compute the 'l1' or 'l2' normalization by normalizing separately
@@ -72,4 +73,4 @@ def normalize(self, bow):
 
     def __getitem__(self, bow):
         return self.normalize(bow)
-#endclass NormModel
+# endclass NormModel
diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py
index 33390fc08e..1f0826258c 100644
--- a/gensim/models/phrases.py
+++ b/gensim/models/phrases.py
@@ -107,6 +107,7 @@ class Phrases(interfaces.TransformationABC):
     and `phrases[corpus]` syntax.
 
     """
+
     def __init__(self, sentences=None, min_count=5, threshold=10.0,
                  max_vocab_size=40000000, delimiter=b'_', progress_per=10000,
                  scoring='default'):
@@ -390,6 +391,7 @@ class Phraser(interfaces.TransformationABC):
     other values.)
 
     """
+
     def __init__(self, phrases_model):
         self.threshold = phrases_model.threshold
         self.min_count = phrases_model.min_count
@@ -460,7 +462,7 @@ def __getitem__(self, sentence):
         sys.exit(1)
     infile = sys.argv[1]
 
-    from gensim.models import Phrases  # for pickle
+    from gensim.models import Phrases  # noqa:F811 for pickle
     from gensim.models.word2vec import Text8Corpus
     sentences = Text8Corpus(infile)
 
diff --git a/gensim/models/rpmodel.py b/gensim/models/rpmodel.py
index e4fef5acb4..1186a041a0 100644
--- a/gensim/models/rpmodel.py
+++ b/gensim/models/rpmodel.py
@@ -34,6 +34,7 @@ class RpModel(interfaces.TransformationABC):
 
     Model persistency is achieved via its load/save methods.
     """
+
     def __init__(self, corpus, id2word=None, num_topics=300):
         """
         `id2word` is a mapping from word ids (integers) to words (strings). It is
@@ -94,4 +95,4 @@ def __getitem__(self, bow):
     def __setstate__(self, state):
         self.__dict__ = state
         self.freshly_loaded = True
-#endclass RpModel
+# endclass RpModel
diff --git a/gensim/models/tfidfmodel.py b/gensim/models/tfidfmodel.py
index 1b4aea863b..4b5ba02e02 100644
--- a/gensim/models/tfidfmodel.py
+++ b/gensim/models/tfidfmodel.py
@@ -48,6 +48,7 @@ class TfidfModel(interfaces.TransformationABC):
 
     Model persistency is achieved via its load/save methods.
     """
+
     def __init__(
             self, corpus=None, id2word=None, dictionary=None,
             wlocal=utils.identity, wglobal=df2idf, normalize=True):
@@ -101,11 +102,9 @@ def __init__(
             # be initialized in some other way
             pass
 
-
     def __str__(self):
         return "TfidfModel(num_docs=%s, num_nnz=%s)" % (self.num_docs, self.num_nnz)
 
-
     def initialize(self, corpus):
         """
         Compute inverse document weights, which will be used to modify term
@@ -133,7 +132,6 @@ def initialize(self, corpus):
             self.num_docs, n_features, self.num_nnz)
         self.idfs = precompute_idfs(self.wglobal, self.dfs, self.num_docs)
 
-
     def __getitem__(self, bow, eps=1e-12):
         """
         Return tf-idf representation of the input vector and/or corpus.
@@ -160,4 +158,4 @@ def __getitem__(self, bow, eps=1e-12):
         # make sure there are no explicit zeroes in the vector (must be sparse)
         vector = [(termid, weight) for termid, weight in vector if abs(weight) > eps]
         return vector
-#endclass TfidfModel
+# endclass TfidfModel
diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
index a37de25158..255b9c553f 100644
--- a/gensim/models/word2vec.py
+++ b/gensim/models/word2vec.py
@@ -115,17 +115,15 @@
     from Queue import Queue, Empty
 
 from numpy import exp, log, dot, zeros, outer, random, dtype, float32 as REAL,\
-    double, uint32, seterr, array, uint8, vstack, fromstring, sqrt, newaxis,\
-    ndarray, empty, sum as np_sum, prod, ones, ascontiguousarray, vstack, logaddexp
+    uint32, seterr, array, uint8, vstack, fromstring, sqrt,\
+    empty, sum as np_sum, ones, logaddexp
 
 from scipy.special import expit
 
 from gensim import utils, matutils  # utility fnc for pickling, common scipy operations etc
-from gensim.corpora.dictionary import Dictionary
 from six import iteritems, itervalues, string_types
 from six.moves import xrange
 from types import GeneratorType
-from scipy import stats
 
 logger = logging.getLogger(__name__)
 
@@ -215,7 +213,7 @@ def score_sentence_sg(model, sentence, work=None):
 
             # now go over all words from the window, predicting each one in turn
             start = max(0, pos - model.window)
-            for pos2, word2 in enumerate(word_vocabs[start : pos + model.window + 1], start):
+            for pos2, word2 in enumerate(word_vocabs[start: pos + model.window + 1], start):
                 # don't train on OOV words and on the `word` itself
                 if word2 is not None and pos2 != pos:
                     log_prob_sentence += score_sg_pair(model, word, word2)
@@ -372,7 +370,6 @@ def score_cbow_pair(model, word, l1):
     return sum(lprob)
 
 
-
 class Word2Vec(utils.SaveLoad):
     """
     Class for training, using and evaluating neural networks described in https://code.google.com/p/word2vec/
@@ -503,10 +500,10 @@ def __init__(
             self.build_vocab(sentences, trim_rule=trim_rule)
             self.train(sentences, total_examples=self.corpus_count, epochs=self.iter,
                        start_alpha=self.alpha, end_alpha=self.min_alpha)
-        else :
-            if trim_rule is not None :
+        else:
+            if trim_rule is not None:
                 logger.warning("The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part of the model. ")
-                logger.warning("Model initialized without sentences. trim_rule provided, if any, will be ignored." )
+                logger.warning("Model initialized without sentences. trim_rule provided, if any, will be ignored.")
 
     def initialize_word_vectors(self):
         self.wv = KeyedVectors()
@@ -1140,12 +1137,12 @@ def update_weights(self):
         # randomize the remaining words
         for i in xrange(len(self.wv.syn0), len(self.wv.vocab)):
             # construct deterministic seed from word AND seed argument
-            newsyn0[i-len(self.wv.syn0)] = self.seeded_vector(self.wv.index2word[i] + str(self.seed))
+            newsyn0[i - len(self.wv.syn0)] = self.seeded_vector(self.wv.index2word[i] + str(self.seed))
 
         # Raise an error if an online update is run before initial training on a corpus
         if not len(self.wv.syn0):
-            raise RuntimeError("You cannot do an online vocabulary-update of a model which has no prior vocabulary. " \
-                "First build the vocabulary of your model with a corpus " \
+            raise RuntimeError("You cannot do an online vocabulary-update of a model which has no prior vocabulary. "
+                "First build the vocabulary of your model with a corpus "
                 "before doing an online update.")
 
         self.wv.syn0 = vstack([self.wv.syn0, newsyn0])
@@ -1322,10 +1319,10 @@ def predict_output_word(self, context_words_list, topn=10):
         if word2_indices and self.cbow_mean:
             l1 /= len(word2_indices)
 
-        prob_values = exp(dot(l1, self.syn1neg.T))     # propagate hidden -> output and take softmax to get probabilities
+        prob_values = exp(dot(l1, self.syn1neg.T))  # propagate hidden -> output and take softmax to get probabilities
         prob_values /= sum(prob_values)
         top_indices = matutils.argsort(prob_values, topn=topn, reverse=True)
-        return [(self.wv.index2word[index1], prob_values[index1]) for index1 in top_indices]   #returning the most probable output words with their probabilities
+        return [(self.wv.index2word[index1], prob_values[index1]) for index1 in top_indices]  # returning the most probable output words with their probabilities
 
     def init_sims(self, replace=False):
         """
@@ -1377,7 +1374,7 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
     def __str__(self):
         return "%s(vocab=%s, size=%s, alpha=%s)" % (self.__class__.__name__, len(self.wv.index2word), self.vector_size, self.alpha)
 
-    def _minimize_model(self, save_syn1 = False, save_syn1neg = False, save_syn0_lockf = False):
+    def _minimize_model(self, save_syn1=False, save_syn1neg=False, save_syn0_lockf=False):
         warnings.warn("This method would be deprecated in the future. Keep just_word_vectors = model.wv to retain just the KeyedVectors instance for read-only querying of word vectors.")
         if save_syn1 and save_syn1neg and save_syn0_lockf:
             return
@@ -1459,6 +1456,7 @@ def get_latest_training_loss(self):
 
 class BrownCorpus(object):
     """Iterate over sentences from the Brown corpus (part of NLTK data)."""
+
     def __init__(self, dirname):
         self.dirname = dirname
 
@@ -1481,6 +1479,7 @@ def __iter__(self):
 
 class Text8Corpus(object):
     """Iterate over sentences from the "text8" corpus, unzipped from http://mattmahoney.net/dc/text8.zip ."""
+
     def __init__(self, fname, max_sentence_length=MAX_WORDS_IN_BATCH):
         self.fname = fname
         self.max_sentence_length = max_sentence_length
@@ -1541,7 +1540,7 @@ def __iter__(self):
                 line = utils.to_unicode(line).split()
                 i = 0
                 while i < len(line):
-                    yield line[i : i + self.max_sentence_length]
+                    yield line[i: i + self.max_sentence_length]
                     i += self.max_sentence_length
         except AttributeError:
             # If it didn't work like a file, use it as a string filename
@@ -1550,7 +1549,7 @@ def __iter__(self):
                     line = utils.to_unicode(line).split()
                     i = 0
                     while i < len(line):
-                        yield line[i:i + self.max_sentence_length]
+                        yield line[i: i + self.max_sentence_length]
                         i += self.max_sentence_length
 
 
@@ -1619,7 +1618,7 @@ def __iter__(self):
         print(globals()['__doc__'] % locals())
         sys.exit(1)
 
-    from gensim.models.word2vec import Word2Vec  # avoid referencing __main__ in pickle
+    from gensim.models.word2vec import Word2Vec  # noqa:F811 avoid referencing __main__ in pickle
 
     seterr(all='raise')  # don't ignore numpy errors
 
diff --git a/gensim/models/wrappers/__init__.py b/gensim/models/wrappers/__init__.py
index a833ae0c58..9cd14ea8e7 100644
--- a/gensim/models/wrappers/__init__.py
+++ b/gensim/models/wrappers/__init__.py
@@ -2,9 +2,9 @@
 This package contains wrappers for other topic modeling programs.
 """
 
-from .ldamallet import LdaMallet
-from .dtmmodel import DtmModel
-from .ldavowpalwabbit import LdaVowpalWabbit
-from .fasttext import FastText
-from .wordrank import Wordrank
-from .varembed import VarEmbed
\ No newline at end of file
+from .ldamallet import LdaMallet  # noqa:F401
+from .dtmmodel import DtmModel  # noqa:F401
+from .ldavowpalwabbit import LdaVowpalWabbit  # noqa:F401
+from .fasttext import FastText  # noqa:F401
+from .wordrank import Wordrank  # noqa:F401
+from .varembed import VarEmbed  # noqa:F401
diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py
index 5eff091417..bc02663e04 100644
--- a/gensim/models/wrappers/dtmmodel.py
+++ b/gensim/models/wrappers/dtmmodel.py
@@ -27,7 +27,6 @@
 import os
 from subprocess import PIPE
 import numpy as np
-import six
 
 from gensim import utils, corpora, matutils
 from gensim.utils import check_output
@@ -89,7 +88,7 @@ def __init__(
 
         try:
             lencorpus = len(corpus)
-        except:
+        except Exception:
             logger.warning("input corpus stream has no len(); counting documents")
             lencorpus = sum(1 for _ in corpus)
         if lencorpus == 0:
@@ -222,9 +221,9 @@ def train(self, corpus, time_slices, mode, model):
         self.obs_ = np.zeros((self.num_topics, self.num_terms * len(self.time_slices)))
 
         for t in range(self.num_topics):
-                topic = "%03d" % t
-                self.lambda_[t, :] = np.loadtxt(self.fout_prob().format(i=topic))
-                self.obs_[t, :] = np.loadtxt(self.fout_observations().format(i=topic))
+            topic = "%03d" % t
+            self.lambda_[t, :] = np.loadtxt(self.fout_prob().format(i=topic))
+            self.obs_[t, :] = np.loadtxt(self.fout_observations().format(i=topic))
         # cast to correct shape, lambda[5,10,0] is the proportion of the 10th
         # topic in doc 5 at time 0
         self.lambda_.shape = (self.num_topics, self.num_terms, len(self.time_slices))
@@ -256,7 +255,7 @@ def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted
         else:
             num_topics = min(num_topics, self.num_topics)
             chosen_topics = range(num_topics)
-             # add a little random jitter, to randomize results around the same
+            # add a little random jitter, to randomize results around the same
             # alpha
             # sort_alpha = self.alpha + 0.0001 * \
             #     numpy.random.rand(len(self.alpha))
@@ -320,7 +319,7 @@ def dtm_vis(self, corpus, time):
         all of these are needed to visualise topics for DTM for a particular time-slice via pyLDAvis.
         input parameter is the year to do the visualisation.
         """
-        topic_term = np.exp(self.lambda_[:,:,time]) / np.exp(self.lambda_[:,:,time]).sum()
+        topic_term = np.exp(self.lambda_[:, :, time]) / np.exp(self.lambda_[:, :, time]).sum()
         topic_term = topic_term * self.num_topics
 
         doc_topic = self.gamma_
@@ -339,7 +338,7 @@ def dtm_vis(self, corpus, time):
 
     def dtm_coherence(self, time, num_words=20):
         """
-        returns all topics of a particular time-slice without probabilitiy values for it to be used 
+        returns all topics of a particular time-slice without probabilitiy values for it to be used
         for either "u_mass" or "c_v" coherence.
         TODO:
             because of print format right now can only return for 1st time-slice.
diff --git a/gensim/models/wrappers/fasttext.py b/gensim/models/wrappers/fasttext.py
index 9f68d67ca0..839cb46633 100644
--- a/gensim/models/wrappers/fasttext.py
+++ b/gensim/models/wrappers/fasttext.py
@@ -31,6 +31,7 @@
 import tempfile
 import os
 import struct
+from six.moves import xrange
 
 import numpy as np
 from numpy import float32 as REAL, sqrt, newaxis
@@ -38,8 +39,6 @@
 from gensim.models.keyedvectors import KeyedVectors, Vocab
 from gensim.models.word2vec import Word2Vec
 
-from six import string_types
-
 logger = logging.getLogger(__name__)
 
 FASTTEXT_FILEFORMAT_MAGIC = 793712314
@@ -52,6 +51,7 @@ class FastTextKeyedVectors(KeyedVectors):
     Subclasses KeyedVectors to implement oov lookups, storing ngrams and other FastText specific methods
 
     """
+
     def __init__(self):
         super(FastTextKeyedVectors, self).__init__()
         self.syn0_all_norm = None
@@ -90,7 +90,7 @@ def word_vec(self, word, use_norm=False):
                 word_vec += ngram_weights[self.ngrams[ngram]]
             if word_vec.any():
                 return word_vec / len(ngrams)
-            else: # No ngrams of the word are present in self.ngrams
+            else:  # No ngrams of the word are present in self.ngrams
                 raise KeyError('all ngrams for word %s absent from model' % word)
 
     def init_sims(self, replace=False):
@@ -212,7 +212,7 @@ def train(cls, ft_path, corpus_file, output_file=None, model='cbow', size=100, a
             cmd.append("-%s" % option)
             cmd.append(str(value))
 
-        output = utils.check_output(args=cmd)
+        output = utils.check_output(args=cmd)  # noqa:F841
         model = cls.load_fasttext_format(output_file)
         cls.delete_training_files(output_file)
         return model
@@ -390,7 +390,6 @@ def init_ngrams(self):
 
     @staticmethod
     def compute_ngrams(word, min_n, max_n):
-        ngram_indices = []
         BOW, EOW = ('<', '>')  # Used by FastText to attach to all words as prefix and suffix
         extended_word = BOW + word + EOW
         ngrams = []
@@ -414,4 +413,3 @@ def ft_hash(string):
             h = h * np.uint32(16777619)
         np.seterr(**old_settings)
         return h
-
diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py
index c93af78a1a..a4e435810f 100644
--- a/gensim/models/wrappers/ldamallet.py
+++ b/gensim/models/wrappers/ldamallet.py
@@ -53,6 +53,7 @@ class LdaMallet(utils.SaveLoad, basemodel.BaseTopicModel):
     takes place by passing around data files on disk and calling Java with subprocess.call().
 
     """
+
     def __init__(self, mallet_path, corpus=None, num_topics=100, alpha=50, id2word=None, workers=4, prefix=None,
                  optimize_interval=0, iterations=1000, topic_threshold=0.0):
         """
@@ -84,7 +85,7 @@ def __init__(self, mallet_path, corpus=None, num_topics=100, alpha=50, id2word=N
         if self.num_terms == 0:
             raise ValueError("cannot compute LDA over an empty collection (no terms)")
         self.num_topics = num_topics
-        self.topic_threshold=topic_threshold
+        self.topic_threshold = topic_threshold
         self.alpha = alpha
         if prefix is None:
             rand_prefix = hex(random.randint(0, 0xffffff))[2:] + '_'
@@ -193,7 +194,7 @@ def load_word_topics(self):
             _ = next(fin)  # header
             self.alpha = numpy.array([float(val) for val in next(fin).split()[2:]])
             assert len(self.alpha) == self.num_topics, "mismatch between MALLET vs. requested topics"
-            _ = next(fin)  # beta
+            _ = next(fin)  # noqa:F841 beta
             for lineno, line in enumerate(fin):
                 line = utils.to_unicode(line)
                 doc, source, pos, typeindex, token, topic = line.split(" ")
@@ -233,9 +234,9 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
             chosen_topics = range(num_topics)
         else:
             num_topics = min(num_topics, self.num_topics)
-            sort_alpha = self.alpha + 0.0001 * numpy.random.rand(len(self.alpha)) # add a little random jitter, to randomize results around the same alpha
+            sort_alpha = self.alpha + 0.0001 * numpy.random.rand(len(self.alpha))  # add a little random jitter, to randomize results around the same alpha
             sorted_topics = list(matutils.argsort(sort_alpha))
-            chosen_topics = sorted_topics[:num_topics//2] + sorted_topics[-num_topics//2 : ]
+            chosen_topics = sorted_topics[: num_topics // 2] + sorted_topics[-num_topics // 2:]
         shown = []
         for i in chosen_topics:
             if formatted:
@@ -288,8 +289,6 @@ def get_version(self, direc_path):
             except Exception:
                 return "Can't parse pom.xml version file"
 
-
-
     def read_doctopics(self, fname, eps=1e-6, renorm=True):
         """
         Yield document topic vectors from MALLET's "doc-topics" format, as sparse gensim vectors.
diff --git a/gensim/models/wrappers/ldavowpalwabbit.py b/gensim/models/wrappers/ldavowpalwabbit.py
index 8fd0582bae..afa19c4327 100644
--- a/gensim/models/wrappers/ldavowpalwabbit.py
+++ b/gensim/models/wrappers/ldavowpalwabbit.py
@@ -76,6 +76,7 @@ class LdaVowpalWabbit(utils.SaveLoad):
     between Vowpal Wabbit and Python takes place by passing around data files
     on disk and calling the 'vw' binary with the subprocess module.
     """
+
     def __init__(self, vw_path, corpus=None, num_topics=100, id2word=None,
                  chunksize=256, passes=1, alpha=0.1, eta=0.1, decay=0.5,
                  offset=1, gamma_threshold=0.001, random_seed=None,
@@ -197,7 +198,7 @@ def train(self, corpus):
 
         _run_vw_command(cmd)
 
-        # ensure that future updates of this model use correct offset
+        # ensure that future updates of this model use correct offset
         self.offset += corpus_size
 
     def update(self, corpus):
@@ -216,7 +217,7 @@ def update(self, corpus):
 
         _run_vw_command(cmd)
 
-        # ensure that future updates of this model use correct offset
+        # ensure that future updates of this model use correct offset
         self.offset += corpus_size
 
     def log_perplexity(self, chunk):
@@ -312,7 +313,7 @@ def load(cls, fname, *args, **kwargs):
             LOG.debug("Writing model bytes to '%s'", lda_vw._model_filename)
             with utils.smart_open(lda_vw._model_filename, 'wb') as fhandle:
                 fhandle.write(lda_vw._model_data)
-            lda_vw._model_data = None # no need to keep in memory after this
+            lda_vw._model_data = None  # no need to keep in memory after this
 
         if lda_vw._topics_data:
             LOG.debug("Writing topic bytes to '%s'", lda_vw._topics_filename)
@@ -336,11 +337,11 @@ def _init_temp_dir(self, prefix='tmp'):
     def _get_vw_predict_command(self, corpus_size):
         """Get list of command line arguments for running prediction."""
         cmd = [self.vw_path,
-               '--testonly', # don't update model with this data
+               '--testonly',  # don't update model with this data
                '--lda_D', str(corpus_size),
-               '-i', self._model_filename, # load existing binary model
+               '-i', self._model_filename,  # load existing binary model
                '-d', self._corpus_filename,
-               '--learning_rate', '0', # possibly not needed, but harmless
+               '--learning_rate', '0',  # possibly not needed, but harmless
                '-p', self._predict_filename]
 
         if self.random_seed is not None:
@@ -364,7 +365,7 @@ def _get_vw_train_command(self, corpus_size, update=False):
                '--cache_file', self._cache_filename,
                '--lda_epsilon', str(self.gamma_threshold),
                '--readable_model', self._topics_filename,
-               '-k', # clear cache
+               '-k',  # clear cache
                '-f', self._model_filename]
 
         if update:
@@ -570,6 +571,7 @@ def _bit_length(num):
     """Return number of bits needed to encode given number."""
     return len(bin(num).lstrip('-0b'))
 
+
 def vwmodel2ldamodel(vw_model, iterations=50):
     """
     Function to convert vowpal wabbit model to gensim LdaModel. This works by
diff --git a/gensim/models/wrappers/varembed.py b/gensim/models/wrappers/varembed.py
index 6012be2233..eab1e0217c 100644
--- a/gensim/models/wrappers/varembed.py
+++ b/gensim/models/wrappers/varembed.py
@@ -63,7 +63,7 @@ def load_varembed_format(cls, vectors, morfessor_model=None):
         morpho_embeddings = D['morpheme_embeddings']
         result.load_word_embeddings(word_embeddings, word_to_ix)
         if morfessor_model:
-            if sys.version_info >= (2, 7):  #Morfessor is only supported for Python 2.7 and above.
+            if sys.version_info >= (2, 7):  # Morfessor is only supported for Python 2.7 and above.
                 try:
                     import morfessor
                     morfessor_model = morfessor.MorfessorIO().read_binary_model_file(morfessor_model)
@@ -90,7 +90,7 @@ def load_word_embeddings(self, word_embeddings, word_to_ix):
         self.vocab_size = len(counts)
         self.vector_size = word_embeddings.shape[1]
         self.syn0 = np.zeros((self.vocab_size, self.vector_size))
-        self.index2word = [None]*self.vocab_size
+        self.index2word = [None] * self.vocab_size
         logger.info("Corpus has %i words", len(self.vocab))
         for word_id, word in enumerate(counts):
             self.vocab[word] = Vocab(index=word_id, count=counts[word])
@@ -99,7 +99,6 @@ def load_word_embeddings(self, word_embeddings, word_to_ix):
         assert((len(self.vocab), self.vector_size) == self.syn0.shape)
         logger.info("Loaded matrix of %d size and %d dimensions", self.vocab_size, self.vector_size)
 
-
     def add_morphemes_to_embeddings(self, morfessor_model, morpho_embeddings, morpho_to_ix):
         """ Method to include morpheme embeddings into varembed vectors
             Allowed only in Python versions 2.7 and above.
@@ -109,4 +108,3 @@ def add_morphemes_to_embeddings(self, morfessor_model, morpho_embeddings, morpho
                 [morpho_embeddings[morpho_to_ix.get(m, -1)] for m in morfessor_model.viterbi_segment(word)[0]]).sum(axis=0)
             self.syn0[self.vocab[word].index] += morpheme_embedding
         logger.info("Added morphemes to word vectors")
-
diff --git a/gensim/models/wrappers/wordrank.py b/gensim/models/wrappers/wordrank.py
index 38ab25f92b..8426af1d82 100644
--- a/gensim/models/wrappers/wordrank.py
+++ b/gensim/models/wrappers/wordrank.py
@@ -21,17 +21,13 @@
 
 import logging
 import os
-import sys
 import copy
 import multiprocessing
 
-import numpy as np
-
 from gensim import utils
 from gensim.models.keyedvectors import KeyedVectors
 from gensim.scripts.glove2word2vec import glove2word2vec
 
-from six import string_types
 from smart_open import smart_open
 from shutil import copyfile, rmtree
 
@@ -45,7 +41,7 @@ class Wordrank(KeyedVectors):
     takes place by working with data files on disk and calling the Wordrank binary and glove's
     helper binaries (for preparing training data) with subprocess module.
     """
-    
+
     @classmethod
     def train(cls, wr_path, corpus_file, out_name, size=100, window=15, symmetric=1, min_count=5, max_vocab_size=0,
               sgd_num=100, lrate=0.001, period=10, iter=90, epsilon=0.75, dump_period=10, reg=0, alpha=100,
@@ -126,7 +122,7 @@ def train(cls, wr_path, corpus_file, out_name, size=100, window=15, symmetric=1,
         with smart_open(meta_file, 'wb') as f:
             meta_info = "{0} {1}\n{2} {3}\n{4} {5}".format(numwords, numwords, numlines, cooccurrence_shuf_file.split('/')[-1], numwords, vocab_file.split('/')[-1])
             f.write(meta_info.encode('utf-8'))
-            
+
         if iter % dump_period == 0:
             iter += 1
         else:
@@ -162,7 +158,7 @@ def train(cls, wr_path, corpus_file, out_name, size=100, window=15, symmetric=1,
             cmd.append('--%s' % option)
             cmd.append(str(value))
         logger.info("Running wordrank binary")
-        output = utils.check_output(args=cmd)
+        output = utils.check_output(args=cmd)  # noqa:F841
 
         # use embeddings from max. iteration's dump
         max_iter_dump = iter - (iter % dump_period)
@@ -176,7 +172,7 @@ def train(cls, wr_path, corpus_file, out_name, size=100, window=15, symmetric=1,
 
     @classmethod
     def load_wordrank_model(cls, model_file, vocab_file=None, context_file=None, sorted_vocab=1, ensemble=1):
-        glove2word2vec(model_file, model_file+'.w2vformat')
+        glove2word2vec(model_file, model_file + '.w2vformat')
         model = cls.load_word2vec_format('%s.w2vformat' % model_file)
         if ensemble and context_file:
             model.ensemble_embedding(model_file, context_file)
@@ -209,7 +205,7 @@ def sort_embeddings(self, vocab_file):
 
     def ensemble_embedding(self, word_embedding, context_embedding):
         """Replace syn0 with the sum of context and word embeddings."""
-        glove2word2vec(context_embedding, context_embedding+'.w2vformat')
+        glove2word2vec(context_embedding, context_embedding + '.w2vformat')
         w_emb = KeyedVectors.load_word2vec_format('%s.w2vformat' % word_embedding)
         c_emb = KeyedVectors.load_word2vec_format('%s.w2vformat' % context_embedding)
         # compare vocab words using keys of dict vocab
@@ -223,4 +219,3 @@ def ensemble_embedding(self, word_embedding, context_embedding):
         new_emb = w_emb.syn0 + c_emb.syn0
         self.syn0 = new_emb
         return new_emb
-
diff --git a/gensim/nosy.py b/gensim/nosy.py
index 3536965b68..2913e1e694 100644
--- a/gensim/nosy.py
+++ b/gensim/nosy.py
@@ -24,7 +24,7 @@
 
 EXTENSIONS = ['*.py']
 EXECUTABLE = 'nosetests test/'
-DEFAULTARGS = '--with-color -exe'# -w tests'
+DEFAULTARGS = '--with-color -exe'  # -w tests'
 
 
 def checkSum():
@@ -39,6 +39,7 @@ def checkSum():
                 val += stats[stat.ST_SIZE] + stats[stat.ST_MTIME]
     return val
 
+
 if __name__ == '__main__':
     val = 0
     try:
diff --git a/gensim/parsing/__init__.py b/gensim/parsing/__init__.py
index 2af45d7477..5dcc010aec 100644
--- a/gensim/parsing/__init__.py
+++ b/gensim/parsing/__init__.py
@@ -3,5 +3,9 @@
 """
 
 # bring model classes directly into package namespace, to save some typing
-from .porter import PorterStemmer
-from .preprocessing import *
+from .porter import PorterStemmer  # noqa:F401
+from .preprocessing import (remove_stopwords, strip_punctuation, strip_punctuation2,  # noqa:F401
+                            strip_tags, strip_short, strip_numeric,
+                            strip_non_alphanum, strip_multiple_whitespaces,
+                            split_alphanum, stem_text, preprocess_string,
+                            preprocess_documents, read_file, read_files)
diff --git a/gensim/parsing/porter.py b/gensim/parsing/porter.py
index 539271f58c..a22b8b94d1 100644
--- a/gensim/parsing/porter.py
+++ b/gensim/parsing/porter.py
@@ -103,7 +103,7 @@ def _vowelinstem(self):
 
     def _doublec(self, j):
         """True <=> j,(j-1) contain a double consonant."""
-        return j > 0 and self.b[j] == self.b[j-1] and self._cons(j)
+        return j > 0 and self.b[j] == self.b[j - 1] and self._cons(j)
 
     def _cvc(self, i):
         """True <=> i-2,i-1,i has the form consonant - vowel - consonant
@@ -113,25 +113,25 @@ def _cvc(self, i):
            cav(e), lov(e), hop(e), crim(e), but
            snow, box, tray.
         """
-        if i < 2 or not self._cons(i) or self._cons(i-1) or not self._cons(i-2):
+        if i < 2 or not self._cons(i) or self._cons(i - 1) or not self._cons(i - 2):
             return False
         return self.b[i] not in "wxy"
 
     def _ends(self, s):
         """True <=> 0,...k ends with the string s."""
-        if s[-1] != self.b[self.k]: # tiny speed-up
+        if s[-1] != self.b[self.k]:  # tiny speed-up
             return 0
         length = len(s)
         if length > (self.k + 1):
             return 0
-        if self.b[self.k-length+1:self.k+1] != s:
+        if self.b[self.k - length + 1:self.k + 1] != s:
             return 0
         self.j = self.k - length
         return 1
 
     def _setto(self, s):
         """Set (j+1),...k to the characters in the string s, adjusting k."""
-        self.b = self.b[:self.j+1] + s
+        self.b = self.b[:self.j + 1] + s
         self.k = len(self.b) - 1
 
     def _r(self, s):
@@ -171,9 +171,12 @@ def _step1ab(self):
                 self.k -= 1
         elif (self._ends("ed") or self._ends("ing")) and self._vowelinstem():
             self.k = self.j
-            if self._ends("at"):   self._setto("ate")
-            elif self._ends("bl"): self._setto("ble")
-            elif self._ends("iz"): self._setto("ize")
+            if self._ends("at"):
+                self._setto("ate")
+            elif self._ends("bl"):
+                self._setto("ble")
+            elif self._ends("iz"):
+                self._setto("ize")
             elif self._doublec(self.k):
                 if self.b[self.k - 1] not in "lsz":
                     self.k -= 1
@@ -193,87 +196,133 @@ def _step2(self):
         """
         ch = self.b[self.k - 1]
         if ch == 'a':
-            if self._ends("ational"):   self._r("ate")
-            elif self._ends("tional"):  self._r("tion")
+            if self._ends("ational"):
+                self._r("ate")
+            elif self._ends("tional"):
+                self._r("tion")
         elif ch == 'c':
-            if self._ends("enci"):      self._r("ence")
-            elif self._ends("anci"):    self._r("ance")
+            if self._ends("enci"):
+                self._r("ence")
+            elif self._ends("anci"):
+                self._r("ance")
         elif ch == 'e':
-            if self._ends("izer"):      self._r("ize")
+            if self._ends("izer"):
+                self._r("ize")
         elif ch == 'l':
-            if self._ends("bli"):       self._r("ble") # --DEPARTURE--
+            if self._ends("bli"):
+                self._r("ble")  # --DEPARTURE--
             # To match the published algorithm, replace this phrase with
             #   if self._ends("abli"):      self._r("able")
-            elif self._ends("alli"):    self._r("al")
-            elif self._ends("entli"):   self._r("ent")
-            elif self._ends("eli"):     self._r("e")
-            elif self._ends("ousli"):   self._r("ous")
+            elif self._ends("alli"):
+                self._r("al")
+            elif self._ends("entli"):
+                self._r("ent")
+            elif self._ends("eli"):
+                self._r("e")
+            elif self._ends("ousli"):
+                self._r("ous")
         elif ch == 'o':
-            if self._ends("ization"):   self._r("ize")
-            elif self._ends("ation"):   self._r("ate")
-            elif self._ends("ator"):    self._r("ate")
+            if self._ends("ization"):
+                self._r("ize")
+            elif self._ends("ation"):
+                self._r("ate")
+            elif self._ends("ator"):
+                self._r("ate")
         elif ch == 's':
-            if self._ends("alism"):     self._r("al")
-            elif self._ends("iveness"): self._r("ive")
-            elif self._ends("fulness"): self._r("ful")
-            elif self._ends("ousness"): self._r("ous")
+            if self._ends("alism"):
+                self._r("al")
+            elif self._ends("iveness"):
+                self._r("ive")
+            elif self._ends("fulness"):
+                self._r("ful")
+            elif self._ends("ousness"):
+                self._r("ous")
         elif ch == 't':
-            if self._ends("aliti"):     self._r("al")
-            elif self._ends("iviti"):   self._r("ive")
-            elif self._ends("biliti"):  self._r("ble")
-        elif ch == 'g': # --DEPARTURE--
-            if self._ends("logi"):      self._r("log")
+            if self._ends("aliti"):
+                self._r("al")
+            elif self._ends("iviti"):
+                self._r("ive")
+            elif self._ends("biliti"):
+                self._r("ble")
+        elif ch == 'g':  # --DEPARTURE--
+            if self._ends("logi"):
+                self._r("log")
         # To match the published algorithm, delete this phrase
 
     def _step3(self):
         """Deal with -ic-, -full, -ness etc. Similar strategy to _step2."""
         ch = self.b[self.k]
         if ch == 'e':
-            if self._ends("icate"):     self._r("ic")
-            elif self._ends("ative"):   self._r("")
-            elif self._ends("alize"):   self._r("al")
+            if self._ends("icate"):
+                self._r("ic")
+            elif self._ends("ative"):
+                self._r("")
+            elif self._ends("alize"):
+                self._r("al")
         elif ch == 'i':
-            if self._ends("iciti"):     self._r("ic")
+            if self._ends("iciti"):
+                self._r("ic")
         elif ch == 'l':
-            if self._ends("ical"):      self._r("ic")
-            elif self._ends("ful"):     self._r("")
+            if self._ends("ical"):
+                self._r("ic")
+            elif self._ends("ful"):
+                self._r("")
         elif ch == 's':
-            if self._ends("ness"):      self._r("")
+            if self._ends("ness"):
+                self._r("")
 
     def _step4(self):
         """_step4() takes off -ant, -ence etc., in context <c>vcvc<v>."""
         ch = self.b[self.k - 1]
         if ch == 'a':
-            if not self._ends("al"): return
+            if not self._ends("al"):
+                return
         elif ch == 'c':
-            if not self._ends("ance") and not self._ends("ence"): return
+            if not self._ends("ance") and not self._ends("ence"):
+                return
         elif ch == 'e':
-            if not self._ends("er"): return
+            if not self._ends("er"):
+                return
         elif ch == 'i':
-            if not self._ends("ic"): return
+            if not self._ends("ic"):
+                return
         elif ch == 'l':
-            if not self._ends("able") and not self._ends("ible"): return
+            if not self._ends("able") and not self._ends("ible"):
+                return
         elif ch == 'n':
-            if self._ends("ant"): pass
-            elif self._ends("ement"): pass
-            elif self._ends("ment"): pass
-            elif self._ends("ent"): pass
-            else: return
+            if self._ends("ant"):
+                pass
+            elif self._ends("ement"):
+                pass
+            elif self._ends("ment"):
+                pass
+            elif self._ends("ent"):
+                pass
+            else:
+                return
         elif ch == 'o':
-            if self._ends("ion") and self.b[self.j] in "st": pass
-            elif self._ends("ou"): pass
+            if self._ends("ion") and self.b[self.j] in "st":
+                pass
+            elif self._ends("ou"):
+                pass
             # takes care of -ous
-            else: return
+            else:
+                return
         elif ch == 's':
-            if not self._ends("ism"): return
+            if not self._ends("ism"):
+                return
         elif ch == 't':
-            if not self._ends("ate") and not self._ends("iti"): return
+            if not self._ends("ate") and not self._ends("iti"):
+                return
         elif ch == 'u':
-            if not self._ends("ous"): return
+            if not self._ends("ous"):
+                return
         elif ch == 'v':
-            if not self._ends("ive"): return
+            if not self._ends("ive"):
+                return
         elif ch == 'z':
-            if not self._ends("ize"): return
+            if not self._ends("ize"):
+                return
         else:
             return
         if self._m() > 1:
@@ -295,7 +344,7 @@ def stem(self, w):
         w = w.lower()
         k = len(w) - 1
         if k <= 1:
-            return w # --DEPARTURE--
+            return w  # --DEPARTURE--
 
         # With this line, strings of length 1 or 2 don't go through the
         # stemming process, although no mention is made of this in the
@@ -311,7 +360,7 @@ def stem(self, w):
         self._step3()
         self._step4()
         self._step5()
-        return self.b[:self.k+1]
+        return self.b[:self.k + 1]
 
     def stem_sentence(self, txt):
         return " ".join(map(self.stem, txt.split()))
diff --git a/gensim/parsing/preprocessing.py b/gensim/parsing/preprocessing.py
index 367f0b02ad..a92eb98656 100644
--- a/gensim/parsing/preprocessing.py
+++ b/gensim/parsing/preprocessing.py
@@ -45,6 +45,8 @@ def remove_stopwords(s):
 
 
 RE_PUNCT = re.compile('([%s])+' % re.escape(string.punctuation), re.UNICODE)
+
+
 def strip_punctuation(s):
     s = utils.to_unicode(s)
     return RE_PUNCT.sub(" ", s)
@@ -58,9 +60,11 @@ def strip_punctuation(s):
 
 
 RE_TAGS = re.compile(r"<([^>]+)>", re.UNICODE)
+
+
 def strip_tags(s):
     s = utils.to_unicode(s)
-    return RE_TAGS.sub("",s)
+    return RE_TAGS.sub("", s)
 
 
 def strip_short(s, minsize=3):
@@ -69,18 +73,24 @@ def strip_short(s, minsize=3):
 
 
 RE_NUMERIC = re.compile(r"[0-9]+", re.UNICODE)
+
+
 def strip_numeric(s):
     s = utils.to_unicode(s)
     return RE_NUMERIC.sub("", s)
 
 
 RE_NONALPHA = re.compile(r"\W", re.UNICODE)
+
+
 def strip_non_alphanum(s):
     s = utils.to_unicode(s)
     return RE_NONALPHA.sub(" ", s)
 
 
 RE_WHITESPACE = re.compile(r"(\s)+", re.UNICODE)
+
+
 def strip_multiple_whitespaces(s):
     s = utils.to_unicode(s)
     return RE_WHITESPACE.sub(" ", s)
@@ -88,6 +98,8 @@ def strip_multiple_whitespaces(s):
 
 RE_AL_NUM = re.compile(r"([a-z]+)([0-9]+)", flags=re.UNICODE)
 RE_NUM_AL = re.compile(r"([0-9]+)([a-z]+)", flags=re.UNICODE)
+
+
 def split_alphanum(s):
     s = utils.to_unicode(s)
     s = RE_AL_NUM.sub(r"\1 \2", s)
@@ -101,6 +113,8 @@ def stem_text(text):
     text = utils.to_unicode(text)
     p = PorterStemmer()
     return ' '.join(p.stem(word) for word in text.split())
+
+
 stem = stem_text
 
 DEFAULT_FILTERS = [lambda x: x.lower(), strip_tags, strip_punctuation, strip_multiple_whitespaces,
diff --git a/gensim/scripts/glove2word2vec.py b/gensim/scripts/glove2word2vec.py
index 8d3d1cb02f..4f13de4524 100644
--- a/gensim/scripts/glove2word2vec.py
+++ b/gensim/scripts/glove2word2vec.py
@@ -19,11 +19,9 @@
 
 import os
 import sys
-import random
 import logging
 import argparse
 
-import gensim
 from smart_open import smart_open
 
 logger = logging.getLogger(__name__)
@@ -73,4 +71,3 @@ def glove2word2vec(glove_input_file, word2vec_output_file):
     # do the actual conversion
     num_lines, num_dims = glove2word2vec(args.input, args.output)
     logger.info('Converted model with %i vectors and %i dimensions', num_lines, num_dims)
-    
diff --git a/gensim/scripts/make_wiki_online.py b/gensim/scripts/make_wiki_online.py
index 26ca5d83ff..66985a566e 100755
--- a/gensim/scripts/make_wiki_online.py
+++ b/gensim/scripts/make_wiki_online.py
@@ -76,20 +76,20 @@
 
     if online:
         dictionary = HashDictionary(id_range=keep_words, debug=debug)
-        dictionary.allow_update = True # start collecting document frequencies
+        dictionary.allow_update = True  # start collecting document frequencies
         wiki = WikiCorpus(inp, lemmatize=lemmatize, dictionary=dictionary)
-        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000) # ~4h on my macbook pro without lemmatization, 3.1m articles (august 2012)
+        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000)  # ~4h on my macbook pro without lemmatization, 3.1m articles (august 2012)
         # with HashDictionary, the token->id mapping is only fully instantiated now, after `serialize`
         dictionary.filter_extremes(no_below=20, no_above=0.1, keep_n=DEFAULT_DICT_SIZE)
         dictionary.save_as_text(outp + '_wordids.txt.bz2')
         wiki.save(outp + '_corpus.pkl.bz2')
         dictionary.allow_update = False
     else:
-        wiki = WikiCorpus(inp, lemmatize=lemmatize) # takes about 9h on a macbook pro, for 3.5m articles (june 2011)
+        wiki = WikiCorpus(inp, lemmatize=lemmatize)  # takes about 9h on a macbook pro, for 3.5m articles (june 2011)
         # only keep the most frequent words (out of total ~8.2m unique tokens)
         wiki.dictionary.filter_extremes(no_below=20, no_above=0.1, keep_n=DEFAULT_DICT_SIZE)
         # save dictionary and bag-of-words (term-document frequency matrix)
-        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000) # another ~9h
+        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000)  # another ~9h
         wiki.dictionary.save_as_text(outp + '_wordids.txt.bz2')
         # load back the id->word mapping directly from file
         # this seems to save more memory, compared to keeping the wiki.dictionary object from above
diff --git a/gensim/scripts/make_wiki_online_lemma.py b/gensim/scripts/make_wiki_online_lemma.py
index 26ca5d83ff..66985a566e 100755
--- a/gensim/scripts/make_wiki_online_lemma.py
+++ b/gensim/scripts/make_wiki_online_lemma.py
@@ -76,20 +76,20 @@
 
     if online:
         dictionary = HashDictionary(id_range=keep_words, debug=debug)
-        dictionary.allow_update = True # start collecting document frequencies
+        dictionary.allow_update = True  # start collecting document frequencies
         wiki = WikiCorpus(inp, lemmatize=lemmatize, dictionary=dictionary)
-        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000) # ~4h on my macbook pro without lemmatization, 3.1m articles (august 2012)
+        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000)  # ~4h on my macbook pro without lemmatization, 3.1m articles (august 2012)
         # with HashDictionary, the token->id mapping is only fully instantiated now, after `serialize`
         dictionary.filter_extremes(no_below=20, no_above=0.1, keep_n=DEFAULT_DICT_SIZE)
         dictionary.save_as_text(outp + '_wordids.txt.bz2')
         wiki.save(outp + '_corpus.pkl.bz2')
         dictionary.allow_update = False
     else:
-        wiki = WikiCorpus(inp, lemmatize=lemmatize) # takes about 9h on a macbook pro, for 3.5m articles (june 2011)
+        wiki = WikiCorpus(inp, lemmatize=lemmatize)  # takes about 9h on a macbook pro, for 3.5m articles (june 2011)
         # only keep the most frequent words (out of total ~8.2m unique tokens)
         wiki.dictionary.filter_extremes(no_below=20, no_above=0.1, keep_n=DEFAULT_DICT_SIZE)
         # save dictionary and bag-of-words (term-document frequency matrix)
-        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000) # another ~9h
+        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000)  # another ~9h
         wiki.dictionary.save_as_text(outp + '_wordids.txt.bz2')
         # load back the id->word mapping directly from file
         # this seems to save more memory, compared to keeping the wiki.dictionary object from above
diff --git a/gensim/scripts/make_wiki_online_nodebug.py b/gensim/scripts/make_wiki_online_nodebug.py
index 26ca5d83ff..66985a566e 100755
--- a/gensim/scripts/make_wiki_online_nodebug.py
+++ b/gensim/scripts/make_wiki_online_nodebug.py
@@ -76,20 +76,20 @@
 
     if online:
         dictionary = HashDictionary(id_range=keep_words, debug=debug)
-        dictionary.allow_update = True # start collecting document frequencies
+        dictionary.allow_update = True  # start collecting document frequencies
         wiki = WikiCorpus(inp, lemmatize=lemmatize, dictionary=dictionary)
-        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000) # ~4h on my macbook pro without lemmatization, 3.1m articles (august 2012)
+        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000)  # ~4h on my macbook pro without lemmatization, 3.1m articles (august 2012)
         # with HashDictionary, the token->id mapping is only fully instantiated now, after `serialize`
         dictionary.filter_extremes(no_below=20, no_above=0.1, keep_n=DEFAULT_DICT_SIZE)
         dictionary.save_as_text(outp + '_wordids.txt.bz2')
         wiki.save(outp + '_corpus.pkl.bz2')
         dictionary.allow_update = False
     else:
-        wiki = WikiCorpus(inp, lemmatize=lemmatize) # takes about 9h on a macbook pro, for 3.5m articles (june 2011)
+        wiki = WikiCorpus(inp, lemmatize=lemmatize)  # takes about 9h on a macbook pro, for 3.5m articles (june 2011)
         # only keep the most frequent words (out of total ~8.2m unique tokens)
         wiki.dictionary.filter_extremes(no_below=20, no_above=0.1, keep_n=DEFAULT_DICT_SIZE)
         # save dictionary and bag-of-words (term-document frequency matrix)
-        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000) # another ~9h
+        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000)  # another ~9h
         wiki.dictionary.save_as_text(outp + '_wordids.txt.bz2')
         # load back the id->word mapping directly from file
         # this seems to save more memory, compared to keeping the wiki.dictionary object from above
diff --git a/gensim/scripts/make_wikicorpus.py b/gensim/scripts/make_wikicorpus.py
index 26ca5d83ff..66985a566e 100755
--- a/gensim/scripts/make_wikicorpus.py
+++ b/gensim/scripts/make_wikicorpus.py
@@ -76,20 +76,20 @@
 
     if online:
         dictionary = HashDictionary(id_range=keep_words, debug=debug)
-        dictionary.allow_update = True # start collecting document frequencies
+        dictionary.allow_update = True  # start collecting document frequencies
         wiki = WikiCorpus(inp, lemmatize=lemmatize, dictionary=dictionary)
-        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000) # ~4h on my macbook pro without lemmatization, 3.1m articles (august 2012)
+        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000)  # ~4h on my macbook pro without lemmatization, 3.1m articles (august 2012)
         # with HashDictionary, the token->id mapping is only fully instantiated now, after `serialize`
         dictionary.filter_extremes(no_below=20, no_above=0.1, keep_n=DEFAULT_DICT_SIZE)
         dictionary.save_as_text(outp + '_wordids.txt.bz2')
         wiki.save(outp + '_corpus.pkl.bz2')
         dictionary.allow_update = False
     else:
-        wiki = WikiCorpus(inp, lemmatize=lemmatize) # takes about 9h on a macbook pro, for 3.5m articles (june 2011)
+        wiki = WikiCorpus(inp, lemmatize=lemmatize)  # takes about 9h on a macbook pro, for 3.5m articles (june 2011)
         # only keep the most frequent words (out of total ~8.2m unique tokens)
         wiki.dictionary.filter_extremes(no_below=20, no_above=0.1, keep_n=DEFAULT_DICT_SIZE)
         # save dictionary and bag-of-words (term-document frequency matrix)
-        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000) # another ~9h
+        MmCorpus.serialize(outp + '_bow.mm', wiki, progress_cnt=10000)  # another ~9h
         wiki.dictionary.save_as_text(outp + '_wordids.txt.bz2')
         # load back the id->word mapping directly from file
         # this seems to save more memory, compared to keeping the wiki.dictionary object from above
diff --git a/gensim/scripts/word2vec2tensor.py b/gensim/scripts/word2vec2tensor.py
index ce2866c5ae..88cab79d25 100644
--- a/gensim/scripts/word2vec2tensor.py
+++ b/gensim/scripts/word2vec2tensor.py
@@ -15,8 +15,8 @@
     The script will create two TSV files. A 2d tensor format file, and a Word Embedding metadata file. Both files will
     us the --output file name as prefix
 This script is used to convert the word2vec format to Tensorflow 2D tensor and metadata formats for Embedding Visualization
-To use the generated TSV 2D tensor and metadata file in the Projector Visualizer, please 
-1) Open http://projector.tensorflow.org/. 
+To use the generated TSV 2D tensor and metadata file in the Projector Visualizer, please
+1) Open http://projector.tensorflow.org/.
 2) Choose "Load Data" from the left menu.
 3) Select "Choose file" in "Load a TSV file of vectors." and choose you local "_tensor.tsv" file
 4) Select "Choose file" in "Load a TSV file of metadata." and choose you local "_metadata.tsv" file
@@ -28,7 +28,6 @@
 
 import os
 import sys
-import random
 import logging
 import argparse
 
@@ -36,28 +35,30 @@
 
 logger = logging.getLogger(__name__)
 
-def word2vec2tensor(word2vec_model_path,tensor_filename, binary=False):
+
+def word2vec2tensor(word2vec_model_path, tensor_filename, binary=False):
     '''
     Convert Word2Vec mode to 2D tensor TSV file and metadata file
     Args:
         param1 (str): word2vec model file path
         param2 (str): filename prefix
         param2 (bool): set True to use a binary Word2Vec model, defaults to False
-    '''    
+    '''
     model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model_path, binary=binary)
     outfiletsv = tensor_filename + '_tensor.tsv'
     outfiletsvmeta = tensor_filename + '_metadata.tsv'
-    
+
     with open(outfiletsv, 'w+') as file_vector:
         with open(outfiletsvmeta, 'w+') as file_metadata:
             for word in model.index2word:
                 file_metadata.write(gensim.utils.to_utf8(word) + gensim.utils.to_utf8('\n'))
                 vector_row = '\t'.join(map(str, model[word]))
                 file_vector.write(vector_row + '\n')
-    
+
     logger.info("2D tensor file saved to %s" % outfiletsv)
     logger.info("Tensor metadata file saved to %s" % outfiletsvmeta)
 
+
 if __name__ == "__main__":
     logging.basicConfig(format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s', level=logging.INFO)
     logging.root.setLevel(level=logging.INFO)
@@ -76,8 +77,8 @@ def word2vec2tensor(word2vec_model_path,tensor_filename, binary=False):
     parser.add_argument(
         "-o", "--output", required=True,
         help="Output tensor file name prefix")
-    parser.add_argument( "-b", "--binary", 
-                        required=False, 
+    parser.add_argument("-b", "--binary",
+                        required=False,
                         help="If word2vec model in binary format, set True, else False")
     args = parser.parse_args()
 
diff --git a/gensim/scripts/word2vec_standalone.py b/gensim/scripts/word2vec_standalone.py
index a8ee58645a..52baea6f4c 100644
--- a/gensim/scripts/word2vec_standalone.py
+++ b/gensim/scripts/word2vec_standalone.py
@@ -55,10 +55,10 @@
 import argparse
 from numpy import seterr
 
-logger = logging.getLogger(__name__)
-
 from gensim.models.word2vec import Word2Vec, LineSentence  # avoid referencing __main__ in pickle
 
+logger = logging.getLogger(__name__)
+
 
 if __name__ == "__main__":
     logging.basicConfig(
diff --git a/gensim/similarities/__init__.py b/gensim/similarities/__init__.py
index 9f9e03ee11..48915d89c9 100644
--- a/gensim/similarities/__init__.py
+++ b/gensim/similarities/__init__.py
@@ -3,4 +3,4 @@
 """
 
 # bring classes directly into package namespace, to save some typing
-from .docsim import Similarity, MatrixSimilarity, SparseMatrixSimilarity, WmdSimilarity
+from .docsim import Similarity, MatrixSimilarity, SparseMatrixSimilarity, WmdSimilarity  # noqa:F401
diff --git a/gensim/similarities/docsim.py b/gensim/similarities/docsim.py
index 2a1a9512ea..efe71159d3 100755
--- a/gensim/similarities/docsim.py
+++ b/gensim/similarities/docsim.py
@@ -82,6 +82,7 @@ class Shard(utils.SaveLoad):
     request (query).
 
     """
+
     def __init__(self, fname, index):
         self.dirname, self.fname = os.path.split(fname)
         self.length = len(index)
@@ -126,7 +127,7 @@ def __getitem__(self, query):
         try:
             index.num_best = self.num_best
             index.normalize = self.normalize
-        except:
+        except Exception:
             raise ValueError("num_best and normalize have to be set before querying a proxy Shard object")
         return index[query]
 
@@ -149,6 +150,7 @@ class Similarity(interfaces.SimilarityABC):
     The shards themselves are simply stored as files to disk and mmap'ed back as needed.
 
     """
+
     def __init__(self, output_prefix, corpus, num_features, num_best=None, chunksize=256, shardsize=32768, norm='l2'):
         """
         Construct the index from `corpus`. The index can be later extended by calling
@@ -456,7 +458,7 @@ def destroy(self):
         for fname in glob.glob(self.output_prefix + '*'):
             logger.info("deleting %s", fname)
             os.remove(fname)
-#endclass Similarity
+# endclass Similarity
 
 
 class MatrixSimilarity(interfaces.SimilarityABC):
@@ -473,6 +475,7 @@ class MatrixSimilarity(interfaces.SimilarityABC):
     See also `Similarity` and `SparseMatrixSimilarity` in this module.
 
     """
+
     def __init__(self, corpus, num_best=None, dtype=numpy.float32, num_features=None, chunksize=256, corpus_len=None):
         """
         `num_features` is the number of features in the corpus (will be determined
@@ -550,7 +553,8 @@ def get_similarities(self, query):
 
     def __str__(self):
         return "%s<%i docs, %i features>" % (self.__class__.__name__, len(self), self.index.shape[1])
-#endclass MatrixSimilarity
+# endclass MatrixSimilarity
+
 
 class WmdSimilarity(interfaces.SimilarityABC):
     """
@@ -576,6 +580,7 @@ class WmdSimilarity(interfaces.SimilarityABC):
         >>> query = 'Very good, you should seat outdoor.'
         >>> sims = instance[query]
     """
+
     def __init__(self, corpus, w2v_model, num_best=None, normalize_w2v_and_replace=True, chunksize=256):
         """
         corpus:                         List of lists of strings, as in gensim.models.word2vec.
@@ -618,7 +623,7 @@ def get_similarities(self, query):
             # Compute similarity for each query.
             qresult = [self.w2v_model.wmdistance(document, query[qidx]) for document in self.corpus]
             qresult = numpy.array(qresult)
-            qresult = 1./(1.+qresult)  # Similarity is the negative of the distance.
+            qresult = 1. / (1. + qresult)  # Similarity is the negative of the distance.
 
             # Append single query result to list of all results.
             result.append(qresult)
@@ -633,7 +638,8 @@ def get_similarities(self, query):
 
     def __str__(self):
         return "%s<%i docs, %i features>" % (self.__class__.__name__, len(self), self.w2v_model.wv.syn0.shape[1])
-#endclass WmdSimilarity
+# endclass WmdSimilarity
+
 
 class SparseMatrixSimilarity(interfaces.SimilarityABC):
     """
@@ -652,6 +658,7 @@ class SparseMatrixSimilarity(interfaces.SimilarityABC):
 
     See also `Similarity` and `MatrixSimilarity` in this module.
     """
+
     def __init__(self, corpus, num_features=None, num_terms=None, num_docs=None, num_nnz=None,
                  num_best=None, chunksize=500, dtype=numpy.float32, maintain_sparsity=False):
         self.num_best = num_best
@@ -729,4 +736,4 @@ def get_similarities(self, query):
             # otherwise, return a 2d matrix (#queries x #index)
             result = result.toarray().T
         return result
-#endclass SparseMatrixSimilarity
+# endclass SparseMatrixSimilarity
diff --git a/gensim/similarities/index.py b/gensim/similarities/index.py
index 05db85dfbb..d0ca879225 100644
--- a/gensim/similarities/index.py
+++ b/gensim/similarities/index.py
@@ -46,7 +46,7 @@ def save(self, fname, protocol=2):
             _pickle.dump(d, fout, protocol=protocol)
 
     def load(self, fname):
-        fname_dict = fname+'.d'
+        fname_dict = fname + '.d'
         if not (os.path.exists(fname) and os.path.exists(fname_dict)):
             raise IOError(
                 "Can't find index files '%s' and '%s' - Unable to restore AnnoyIndexer state." % (fname, fname_dict))
@@ -62,8 +62,7 @@ def build_from_word2vec(self):
         """Build an Annoy index using word vectors from a Word2Vec model"""
 
         self.model.init_sims()
-        return self._build_from_model(self.model.wv.syn0norm, self.model.wv.index2word
-                                      , self.model.vector_size)
+        return self._build_from_model(self.model.wv.syn0norm, self.model.wv.index2word, self.model.vector_size)
 
     def build_from_doc2vec(self):
         """Build an Annoy index using document vectors from a Doc2Vec model"""
diff --git a/gensim/summarization/__init__.py b/gensim/summarization/__init__.py
index c7efb84d4a..6bca1f109a 100644
--- a/gensim/summarization/__init__.py
+++ b/gensim/summarization/__init__.py
@@ -1,4 +1,4 @@
 
 # bring model classes directly into package namespace, to save some typing
-from .summarizer import summarize, summarize_corpus
-from .keywords import keywords
+from .summarizer import summarize, summarize_corpus  # noqa:F401
+from .keywords import keywords  # noqa:F401
diff --git a/gensim/test/basetests.py b/gensim/test/basetests.py
index 1d22d8b1a8..a22bfe2d30 100644
--- a/gensim/test/basetests.py
+++ b/gensim/test/basetests.py
@@ -17,14 +17,14 @@ def testPrintTopic(self):
         topics = self.model.show_topics(formatted=True)
         for topic_no, topic in topics:
             self.assertTrue(isinstance(topic_no, int))
-            self.assertTrue(isinstance(topic, str) or isinstance(topic, unicode))
+            self.assertTrue(isinstance(topic, str) or isinstance(topic, unicode))  # noqa:F821
 
     def testPrintTopics(self):
         topics = self.model.print_topics()
 
         for topic_no, topic in topics:
             self.assertTrue(isinstance(topic_no, int))
-            self.assertTrue(isinstance(topic, str) or isinstance(topic, unicode))
+            self.assertTrue(isinstance(topic, str) or isinstance(topic, unicode))  # noqa:F821
 
     def testShowTopic(self):
         topic = self.model.show_topic(1)
diff --git a/gensim/test/simspeed.py b/gensim/test/simspeed.py
index c3ced33a55..4c1ffcab6f 100755
--- a/gensim/test/simspeed.py
+++ b/gensim/test/simspeed.py
@@ -21,7 +21,6 @@
 from time import time
 
 import numpy as np
-import scipy.sparse
 
 import gensim
 
@@ -66,7 +65,7 @@
                 sims.extend(sim)
         else:
             sims = [index_dense[vec] for vec in query]
-        assert len(sims) == len(query) # make sure we have one result for each query document
+        assert len(sims) == len(query)  # make sure we have one result for each query document
         taken = time() - start
         queries = math.ceil(1.0 * len(query) / chunksize)
         logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)" %
@@ -85,7 +84,7 @@
                 sims.extend(sim)
         else:
             sims = [index_sparse[vec] for vec in query]
-        assert len(sims) == len(query) # make sure we have one result for each query document
+        assert len(sims) == len(query)  # make sure we have one result for each query document
         taken = time() - start
         queries = math.ceil(1.0 * len(query) / chunksize)
         logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)" %
diff --git a/gensim/test/simspeed2.py b/gensim/test/simspeed2.py
index 158fa3fb9e..334730a6f1 100755
--- a/gensim/test/simspeed2.py
+++ b/gensim/test/simspeed2.py
@@ -20,9 +20,6 @@
 import math
 from time import time
 
-import numpy as np
-import scipy.sparse
-
 import gensim
 
 
diff --git a/gensim/test/svd_error.py b/gensim/test/svd_error.py
index 33b20a017a..4f204c1147 100755
--- a/gensim/test/svd_error.py
+++ b/gensim/test/svd_error.py
@@ -21,7 +21,9 @@
 from __future__ import print_function, with_statement
 
 import logging
-import os, sys, time
+import os
+import sys
+import time
 import bz2
 import itertools
 
@@ -36,22 +38,21 @@
     # no SVDLIBC: install with `easy_install sparsesvd` if you want SVDLIBC results as well
     sparsesvd = None
 
-sparsesvd = None # don't use SVDLIBC
+sparsesvd = None  # don't use SVDLIBC
 
-FACTORS = [300] # which num_topics to try
-CHUNKSIZE = [10000, 1000] # which chunksize to try
-POWER_ITERS = [0, 1, 2, 4, 6] # extra power iterations for the randomized algo
+FACTORS = [300]  # which num_topics to try
+CHUNKSIZE = [10000, 1000]  # which chunksize to try
+POWER_ITERS = [0, 1, 2, 4, 6]  # extra power iterations for the randomized algo
 
 # when reporting reconstruction error, also report spectral norm error? (very slow)
 COMPUTE_NORM2 = False
 
 
-
 def norm2(a):
     """Spectral norm ("norm 2") of a symmetric matrix `a`."""
     if COMPUTE_NORM2:
         logging.info("computing spectral norm of a %s matrix" % str(a.shape))
-        return scipy.linalg.eigvalsh(a).max() # much faster than np.linalg.norm(2)
+        return scipy.linalg.eigvalsh(a).max()  # much faster than np.linalg.norm(2)
     else:
         return np.nan
 
@@ -64,7 +65,7 @@ def print_error(name, aat, u, s, ideal_nf, ideal_n2):
     err = -np.dot(u, np.dot(np.diag(s), u.T))
     err += aat
     nf, n2 = np.linalg.norm(err), norm2(err)
-    print ('%s error: norm_frobenius=%f (/ideal=%g), norm2=%f (/ideal=%g), RMSE=%g' %
+    print('%s error: norm_frobenius=%f (/ideal=%g), norm2=%f (/ideal=%g), RMSE=%g' %
            (name, nf, nf / ideal_nf, n2, n2 / ideal_n2, rmse(err)))
     sys.stdout.flush()
 
@@ -79,7 +80,6 @@ def __iter__(self):
             yield [(f, w) for f, w in doc if f < self.max_terms]
 
 
-
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
     logging.info("running %s" % " ".join(sys.argv))
@@ -109,7 +109,7 @@ def __iter__(self):
     corpus = ClippedCorpus(mm, n, m)
     id2word = gensim.utils.FakeDict(m)
 
-    logging.info("computing corpus * corpus^T") # eigenvalues of this matrix are singular values of `corpus`, squared
+    logging.info("computing corpus * corpus^T")  # eigenvalues of this matrix are singular values of `corpus`, squared
     aat = np.zeros((m, m), dtype=np.float64)
     for chunk in gensim.utils.grouper(corpus, chunksize=5000):
         num_nnz = sum(len(doc) for doc in chunk)
@@ -122,11 +122,10 @@ def __iter__(self):
     logging.info("computing full decomposition of corpus * corpus^t")
     aat = aat.astype(np.float32)
     spectrum_s, spectrum_u = scipy.linalg.eigh(aat)
-    spectrum_s = spectrum_s[::-1] # re-order to descending eigenvalue order
+    spectrum_s = spectrum_s[::-1]  # re-order to descending eigenvalue order
     spectrum_u = spectrum_u.T[::-1].T
     np.save(fname + '.spectrum.npy', spectrum_s)
 
-
     for factors in FACTORS:
         err = -np.dot(spectrum_u[:, :factors], np.dot(np.diag(spectrum_s[:factors]), spectrum_u[:, :factors].T))
         err += aat
@@ -145,7 +144,7 @@ def __iter__(self):
             taken = time.time() - taken
             del corpus_ram
             del vt
-            u, s = ut.T.astype(np.float32), s.astype(np.float32)**2 # convert singular values to eigenvalues
+            u, s = ut.T.astype(np.float32), s.astype(np.float32)**2  # convert singular values to eigenvalues
             del ut
             print("SVDLIBC SVD for %i factors took %s s (spectrum %f .. %f)"
                   % (factors, taken, s[0], s[-1]))
@@ -162,7 +161,7 @@ def __iter__(self):
                 taken = time.time() - taken
                 u, s = model.projection.u.astype(np.float32), model.projection.s.astype(np.float32)**2
                 del model
-                print ("incremental SVD for %i factors, %i power iterations, chunksize %i took %s s (spectrum %f .. %f)" %
+                print("incremental SVD for %i factors, %i power iterations, chunksize %i took %s s (spectrum %f .. %f)" %
                        (factors, power_iters, chunksize, taken, s[0], s[-1]))
                 print_error('incremental SVD', aat, u, s, ideal_fro, ideal_n2)
                 del u
@@ -174,7 +173,7 @@ def __iter__(self):
             taken = time.time() - taken
             u, s = model.projection.u.astype(np.float32), model.projection.s.astype(np.float32)**2
             del model
-            print ("multipass SVD for %i factors, %i power iterations took %s s (spectrum %f .. %f)" %
+            print("multipass SVD for %i factors, %i power iterations took %s s (spectrum %f .. %f)" %
                    (factors, power_iters, taken, s[0], s[-1]))
             print_error('multipass SVD', aat, u, s, ideal_fro, ideal_n2)
             del u
diff --git a/gensim/test/test_aggregation.py b/gensim/test/test_aggregation.py
index 44e3d16f65..5f09c30ccd 100644
--- a/gensim/test/test_aggregation.py
+++ b/gensim/test/test_aggregation.py
@@ -13,6 +13,7 @@
 
 from gensim.topic_coherence import aggregation
 
+
 class TestAggregation(unittest.TestCase):
     def setUp(self):
         self.confirmed_measures = [1.1, 2.2, 3.3, 4.4]
@@ -23,6 +24,7 @@ def testArithmeticMean(self):
         expected = 2.75
         self.assertEqual(obtained, expected)
 
+
 if __name__ == '__main__':
     logging.root.setLevel(logging.WARNING)
     unittest.main()
diff --git a/gensim/test/test_atmodel.py b/gensim/test/test_atmodel.py
index d2625f6ede..17cf1619f9 100644
--- a/gensim/test/test_atmodel.py
+++ b/gensim/test/test_atmodel.py
@@ -22,7 +22,6 @@
 
 import six
 import numpy as np
-import scipy.linalg
 
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models import atmodel
@@ -483,10 +482,10 @@ def testLargeMmap(self):
         model = self.model
 
         # simulate storing large arrays separately
-        model.save(testfile(), sep_limit=0)
+        model.save(fname, sep_limit=0)
 
         # test loading the large model arrays with mmap
-        model2 = self.class_.load(testfile(), mmap='r')
+        model2 = self.class_.load(fname, mmap='r')
         self.assertEqual(model.num_topics, model2.num_topics)
         self.assertTrue(isinstance(model2.expElogbeta, np.memmap))
         self.assertTrue(np.allclose(model.expElogbeta, model2.expElogbeta))
diff --git a/gensim/test/test_big.py b/gensim/test/test_big.py
index ea3cc7adde..5e6972bd1f 100644
--- a/gensim/test/test_big.py
+++ b/gensim/test/test_big.py
@@ -12,7 +12,6 @@
 import logging
 import unittest
 import os
-import itertools
 import tempfile
 
 import numpy as np
@@ -27,6 +26,7 @@ def testfile():
 
 class BigCorpus(object):
     """A corpus of a large number of docs & large vocab"""
+
     def __init__(self, words_only=False, num_terms=200000, num_docs=1000000, doc_len=100):
         self.dictionary = gensim.utils.FakeDict(num_terms)
         self.words_only = words_only
@@ -47,6 +47,7 @@ def __iter__(self):
 if os.environ.get('GENSIM_BIG', False):
     class TestLargeData(unittest.TestCase):
         """Try common operations, using large models. You'll need ~8GB RAM to run these tests"""
+
         def testWord2Vec(self):
             corpus = BigCorpus(words_only=True, num_docs=100000, num_terms=3000000, doc_len=200)
             model = gensim.models.Word2Vec(corpus, size=300, workers=4)
diff --git a/gensim/test/test_corpora_hashdictionary.py b/gensim/test/test_corpora_hashdictionary.py
index be19db8e39..6f314d65ee 100644
--- a/gensim/test/test_corpora_hashdictionary.py
+++ b/gensim/test/test_corpora_hashdictionary.py
@@ -101,7 +101,6 @@ def testDebugMode(self):
         expected = {}
         self.assertEqual(d.id2token, expected)
 
-
     def testRange(self):
         # all words map to the same id
         d = HashDictionary(self.texts, id_range=1, debug=True)
@@ -121,10 +120,9 @@ def testRange(self):
         self.assertEqual(d.id2token, id2token)
         self.assertEqual(d.token2id, token2id)
 
-
     def testBuild(self):
         d = HashDictionary(self.texts, myhash=zlib.adler32)
-        expected =  {5232: 2,
+        expected = {5232: 2,
                      5798: 3,
                      10608: 2,
                      12466: 2,
@@ -178,7 +176,6 @@ def test_saveAsTextBz2(self):
         self.assertEqual(len(d), len(d2))
 
 
-
 if __name__ == '__main__':
     logging.basicConfig(level=logging.WARNING)
     unittest.main()
diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py
index 1265edc2b7..ad4eb4c976 100644
--- a/gensim/test/test_doc2vec.py
+++ b/gensim/test/test_doc2vec.py
@@ -22,7 +22,7 @@
 
 import numpy as np
 
-from gensim import utils, matutils
+from gensim import utils
 from gensim.models import doc2vec, keyedvectors
 
 module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
@@ -41,6 +41,7 @@ def __iter__(self):
             for i, line in enumerate(f):
                 yield doc2vec.TaggedDocument(utils.simple_preprocess(line), [self._tag(i)])
 
+
 list_corpus = list(DocsLeeCorpus())
 
 raw_sentences = [
@@ -62,13 +63,15 @@ def testfile():
     # temporary data will be stored to this file
     return os.path.join(tempfile.gettempdir(), 'gensim_doc2vec.tst')
 
+
 def load_on_instance():
     # Save and load a Doc2Vec Model on instance for test
     model = doc2vec.Doc2Vec(DocsLeeCorpus(), min_count=1)
     model.save(testfile())
-    model = doc2vec.Doc2Vec() # should fail at this point
+    model = doc2vec.Doc2Vec()  # should fail at this point
     return model.load(testfile())
 
+
 class TestDoc2VecModel(unittest.TestCase):
     def test_persistence(self):
         """Test storing/loading the entire model."""
@@ -378,7 +381,7 @@ def testTrainWarning(self, l):
     def testLoadOnClassError(self):
         """Test if exception is raised when loading doc2vec model on instance"""
         self.assertRaises(AttributeError, load_on_instance)
-#endclass TestDoc2VecModel
+# endclass TestDoc2VecModel
 
 
 if not hasattr(TestDoc2VecModel, 'assertLess'):
@@ -397,6 +400,7 @@ class ConcatenatedDoc2Vec(object):
     Models must have exactly-matching vocabulary and document IDs. (Models should
     be trained separately; this wrapper just returns concatenated results.)
     """
+
     def __init__(self, models):
         self.models = models
         if hasattr(models[0], 'docvecs'):
diff --git a/gensim/test/test_dtm.py b/gensim/test/test_dtm.py
index bd99136332..81b48374ab 100644
--- a/gensim/test/test_dtm.py
+++ b/gensim/test/test_dtm.py
@@ -69,6 +69,7 @@ def testCalledProcessError(self):
                     id2word=self.id2word, model='dtm', initialize_lda=False,
                     rng_seed=1)
 
+
 if __name__ == '__main__':
     logging.basicConfig(level=logging.DEBUG)
     unittest.main()
diff --git a/gensim/test/test_fasttext_wrapper.py b/gensim/test/test_fasttext_wrapper.py
index bf6ac7db98..6a7a7b09ed 100644
--- a/gensim/test/test_fasttext_wrapper.py
+++ b/gensim/test/test_fasttext_wrapper.py
@@ -225,7 +225,7 @@ def testLoadModelWithNonAsciiVocab(self):
         model = fasttext.FastText.load_fasttext_format(datapath('non_ascii_fasttext'))
         self.assertTrue(u'který' in model)
         try:
-            vector = model[u'který']
+            vector = model[u'který']  # noqa:F841
         except UnicodeDecodeError:
             self.fail('Unable to access vector for utf8 encoded non-ascii word')
 
@@ -234,7 +234,7 @@ def testLoadModelNonUtf8Encoding(self):
         model = fasttext.FastText.load_fasttext_format(datapath('cp852_fasttext'), encoding='cp852')
         self.assertTrue(u'který' in model)
         try:
-            vector = model[u'který']
+            vector = model[u'který']  # noqa:F841
         except KeyError:
             self.fail('Unable to access vector for cp-852 word')
 
diff --git a/gensim/test/test_glove2word2vec.py b/gensim/test/test_glove2word2vec.py
index 1e83e93154..b638ca927d 100644
--- a/gensim/test/test_glove2word2vec.py
+++ b/gensim/test/test_glove2word2vec.py
@@ -9,7 +9,6 @@
 import logging
 import unittest
 import os
-import sys
 import tempfile
 
 import numpy
@@ -17,31 +16,33 @@
 
 from gensim.utils import check_output
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
+
 def testfile():
     # temporary model will be stored to this file
     return os.path.join(tempfile.gettempdir(), 'glove2word2vec.test')
 
+
 class TestGlove2Word2Vec(unittest.TestCase):
     def setUp(self):
         self.datapath = datapath('test_glove.txt')
         self.output_file = testfile()
 
     def testConversion(self):
-        output = check_output(args=['python', '-m', 'gensim.scripts.glove2word2vec', '--input', self.datapath, '--output', self.output_file])    
+        output = check_output(args=['python', '-m', 'gensim.scripts.glove2word2vec', '--input', self.datapath, '--output', self.output_file])  # noqa:F841
         # test that the converted model loads successfully
         try:
             self.test_model = gensim.models.KeyedVectors.load_word2vec_format(self.output_file)
             self.assertTrue(numpy.allclose(self.test_model.n_similarity(['the', 'and'], ['and', 'the']), 1.0))
-        except:
+        except Exception:
             if os.path.isfile(os.path.join(self.output_file)):
                 self.fail('model file %s was created but could not be loaded.' % self.output_file)
             else:
                 self.fail('model file %s creation failed, check the parameters and input file format.' % self.output_file)
 
+
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
     unittest.main()
-
diff --git a/gensim/test/test_hdpmodel.py b/gensim/test/test_hdpmodel.py
index e2b543687c..647b31ad7e 100644
--- a/gensim/test/test_hdpmodel.py
+++ b/gensim/test/test_hdpmodel.py
@@ -15,17 +15,13 @@
 import os.path
 import tempfile
 
-import six
-import scipy.linalg
-
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models import hdpmodel
-from gensim import matutils
 from gensim.test import basetests
 
 import numpy as np
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 
@@ -63,8 +59,8 @@ def testTopicValues(self):
         prob, word = results[1].split('+')[0].split('*')
         self.assertEqual(results[0], 0)
         self.assertEqual(prob, expected_prob)
-        self.assertEqual(word, expected_word)        
- 
+        self.assertEqual(word, expected_word)
+
         return
 
     def testLDAmodel(self):
diff --git a/gensim/test/test_keras_integration.py b/gensim/test/test_keras_integration.py
index ced285c77c..7f321c6565 100644
--- a/gensim/test/test_keras_integration.py
+++ b/gensim/test/test_keras_integration.py
@@ -112,7 +112,7 @@ def testEmbeddingLayer20NewsGroup(self):
                     texts.append(sentence)
                     texts_w2v.append(sentence.split(' '))
                     labels.append(label_id)
-            except:
+            except Exception:
                 None
 
         # Vectorize the text samples into a 2D integer tensor
diff --git a/gensim/test/test_keywords.py b/gensim/test/test_keywords.py
index 952ba2fadd..8a3be3af5c 100644
--- a/gensim/test/test_keywords.py
+++ b/gensim/test/test_keywords.py
@@ -17,7 +17,6 @@
 import unittest
 
 from gensim import utils
-from gensim.corpora import Dictionary
 from gensim.summarization import keywords
 
 
@@ -75,7 +74,6 @@ def test_text_summarization_raises_exception_on_short_input_text(self):
 
         self.assertTrue(keywords(text) is not None)
 
-
     def test_keywords_ratio(self):
         pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
 
@@ -89,7 +87,7 @@ def test_keywords_ratio(self):
         selected_docs_12 = keywords(text, ratio=0.1, split=True)
         selected_docs_21 = keywords(text, ratio=0.2, split=True)
 
-        self.assertAlmostEqual(float(len(selected_docs_21))/len(selected_docs_12), float(21)/12, places=1)
+        self.assertAlmostEqual(float(len(selected_docs_21)) / len(selected_docs_12), float(21) / 12, places=1)
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_ldamallet_wrapper.py b/gensim/test/test_ldamallet_wrapper.py
index 9fec0e8605..a7d64839bb 100644
--- a/gensim/test/test_ldamallet_wrapper.py
+++ b/gensim/test/test_ldamallet_wrapper.py
@@ -15,10 +15,7 @@
 import os.path
 import tempfile
 
-import six
 import numpy as np
-import scipy.linalg
-
 
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models.wrappers import ldamallet
@@ -26,7 +23,7 @@
 from gensim.models import ldamodel
 from gensim.test import basetests
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
@@ -44,11 +41,11 @@
 corpus = [dictionary.doc2bow(text) for text in texts]
 
 
-
 def testfile():
     # temporary data will be stored to this file
     return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
 
+
 class TestLdaMallet(unittest.TestCase, basetests.TestBaseTopicModel):
     def setUp(self):
         mallet_home = os.environ.get('MALLET_HOME', None)
@@ -60,40 +57,38 @@ def setUp(self):
         # self.model is used in TestBaseTopicModel
         self.model = ldamallet.LdaMallet(self.mallet_path, corpus, id2word=dictionary, num_topics=2, iterations=1)
 
-
     def testTransform(self):
         if not self.mallet_path:
             return
         passed = False
-        for i in range(5): # restart at most 5 times
+        for i in range(5):  # restart at most 5 times
             # create the transformation model
             model = ldamallet.LdaMallet(self.mallet_path, corpus, id2word=dictionary, num_topics=2, iterations=200)
             # transform one document
             doc = list(corpus)[0]
             transformed = model[doc]
-            vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests
+            vec = matutils.sparse2full(transformed, 2)  # convert to dense vector, for easier equality tests
             expected = [0.49, 0.51]
-            passed = np.allclose(sorted(vec), sorted(expected), atol=1e-1) # must contain the same values, up to re-ordering
+            passed = np.allclose(sorted(vec), sorted(expected), atol=1e-1)  # must contain the same values, up to re-ordering
             if passed:
                 break
             logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" %
                             (i, sorted(vec), sorted(expected)))
         self.assertTrue(passed)
 
-
     def testSparseTransform(self):
         if not self.mallet_path:
             return
         passed = False
-        for i in range(5): # restart at most 5 times
+        for i in range(5):  # restart at most 5 times
             # create the sparse transformation model with the appropriate topic_threshold
             model = ldamallet.LdaMallet(self.mallet_path, corpus, id2word=dictionary, num_topics=2, iterations=200, topic_threshold=0.5)
             # transform one document
             doc = list(corpus)[0]
             transformed = model[doc]
-            vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests
+            vec = matutils.sparse2full(transformed, 2)  # convert to dense vector, for easier equality tests
             expected = [1.0, 0.0]
-            passed = np.allclose(sorted(vec), sorted(expected), atol=1e-2) # must contain the same values, up to re-ordering
+            passed = np.allclose(sorted(vec), sorted(expected), atol=1e-2)  # must contain the same values, up to re-ordering
             if passed:
                 break
             logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" %
@@ -103,7 +98,7 @@ def testSparseTransform(self):
     def testMallet2Model(self):
         if not self.mallet_path:
             return
-        passed = False
+
         tm1 = ldamallet.LdaMallet(self.mallet_path, corpus=corpus, num_topics=2, id2word=dictionary)
         tm2 = ldamallet.malletmodel2ldamodel(tm1)
         for document in corpus:
@@ -129,7 +124,7 @@ def testPersistence(self):
         self.assertEqual(model.num_topics, model2.num_topics)
         self.assertTrue(np.allclose(model.word_topics, model2.word_topics))
         tstvec = []
-        self.assertTrue(np.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
+        self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testPersistenceCompressed(self):
         if not self.mallet_path:
@@ -141,7 +136,7 @@ def testPersistenceCompressed(self):
         self.assertEqual(model.num_topics, model2.num_topics)
         self.assertTrue(np.allclose(model.word_topics, model2.word_topics))
         tstvec = []
-        self.assertTrue(np.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
+        self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testLargeMmap(self):
         if not self.mallet_path:
@@ -150,15 +145,15 @@ def testLargeMmap(self):
         model = ldamallet.LdaMallet(self.mallet_path, self.corpus, num_topics=2, iterations=100)
 
         # simulate storing large arrays separately
-        model.save(testfile(), sep_limit=0)
+        model.save(fname, sep_limit=0)
 
         # test loading the large model arrays with mmap
-        model2 = ldamodel.LdaModel.load(testfile(), mmap='r')
+        model2 = ldamodel.LdaModel.load(fname, mmap='r')
         self.assertEqual(model.num_topics, model2.num_topics)
         self.assertTrue(isinstance(model2.word_topics, np.memmap))
         self.assertTrue(np.allclose(model.word_topics, model2.word_topics))
         tstvec = []
-        self.assertTrue(np.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
+        self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testLargeMmapCompressed(self):
         if not self.mallet_path:
@@ -171,9 +166,9 @@ def testLargeMmapCompressed(self):
 
         # test loading the large model arrays with mmap
         self.assertRaises(IOError, ldamodel.LdaModel.load, fname, mmap='r')
-#endclass TestLdaMallet
+# endclass TestLdaMallet
+
 
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
     unittest.main()
-    
diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py
index 22d9ba530a..71cc10f70c 100644
--- a/gensim/test/test_ldamodel.py
+++ b/gensim/test/test_ldamodel.py
@@ -18,14 +18,13 @@
 
 import six
 import numpy as np
-import scipy.linalg
 
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models import ldamodel, ldamulticore
 from gensim import matutils, utils
 from gensim.test import basetests
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 
@@ -61,13 +60,12 @@ def setUp(self):
         self.class_ = ldamodel.LdaModel
         self.model = self.class_(corpus, id2word=dictionary, num_topics=2, passes=100)
 
-
     def testTransform(self):
         passed = False
         # sometimes, LDA training gets stuck at a local minimum
         # in that case try re-training the model from scratch, hoping for a
         # better random initialization
-        for i in range(25): # restart at most 5 times
+        for i in range(25):  # restart at most 5 times
             # create the transformation model
             model = self.class_(id2word=dictionary, num_topics=2, passes=100)
             model.update(self.corpus)
@@ -76,9 +74,9 @@ def testTransform(self):
             doc = list(corpus)[0]
             transformed = model[doc]
 
-            vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests
+            vec = matutils.sparse2full(transformed, 2)  # convert to dense vector, for easier equality tests
             expected = [0.13, 0.87]
-            passed = np.allclose(sorted(vec), sorted(expected), atol=1e-1) # must contain the same values, up to re-ordering
+            passed = np.allclose(sorted(vec), sorted(expected), atol=1e-1)  # must contain the same values, up to re-ordering
             if passed:
                 break
             logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" %
@@ -146,7 +144,6 @@ def testAlpha(self):
         kwargs['alpha'] = "gensim is cool"
         self.assertRaises(ValueError, self.class_, **kwargs)
 
-
     def testEtaAuto(self):
         model1 = self.class_(corpus, id2word=dictionary, eta='symmetric', passes=10)
         modelauto = self.class_(corpus, id2word=dictionary, eta='auto', passes=10)
@@ -193,7 +190,7 @@ def testEta(self):
         self.assertEqual(model.eta.shape, expected_shape)
         self.assertTrue(all(model.eta == np.array([0.3] * num_terms)))
 
-	# should be ok with num_topics x num_terms
+        # should be ok with num_topics x num_terms
         testeta = np.array([[0.5] * len(dictionary)] * 2)
         kwargs['eta'] = testeta
         self.class_(**kwargs)
@@ -234,7 +231,7 @@ def testGetTopicTerms(self):
 
     def testGetDocumentTopics(self):
 
-        model = self.class_(self.corpus, id2word=dictionary, num_topics=2, passes= 100, random_state=np.random.seed(0))
+        model = self.class_(self.corpus, id2word=dictionary, num_topics=2, passes=100, random_state=np.random.seed(0))
 
         doc_topics = model.get_document_topics(self.corpus)
 
@@ -244,26 +241,26 @@ def testGetDocumentTopics(self):
                 self.assertTrue(isinstance(k, int))
                 self.assertTrue(isinstance(v, float))
 
-        #Test case to use the get_document_topic function for the corpus
+        # Test case to use the get_document_topic function for the corpus
         all_topics = model.get_document_topics(self.corpus, per_word_topics=True)
 
         self.assertEqual(model.state.numdocs, len(corpus))
 
         for topic in all_topics:
             self.assertTrue(isinstance(topic, tuple))
-            for k, v in topic[0]: # list of doc_topics
+            for k, v in topic[0]:  # list of doc_topics
                 self.assertTrue(isinstance(k, int))
                 self.assertTrue(isinstance(v, float))
 
-            for w, topic_list in topic[1]: # list of word_topics
+            for w, topic_list in topic[1]:  # list of word_topics
                 self.assertTrue(isinstance(w, int))
                 self.assertTrue(isinstance(topic_list, list))
 
-            for w, phi_values in topic[2]: # list of word_phis
+            for w, phi_values in topic[2]:  # list of word_phis
                 self.assertTrue(isinstance(w, int))
                 self.assertTrue(isinstance(phi_values, list))
 
-        #Test case to check the filtering effect of minimum_probability and minimum_phi_value
+        # Test case to check the filtering effect of minimum_probability and minimum_phi_value
         doc_topic_count_na = 0
         word_phi_count_na = 0
 
@@ -273,25 +270,24 @@ def testGetDocumentTopics(self):
 
         for topic in all_topics:
             self.assertTrue(isinstance(topic, tuple))
-            for k, v in topic[0]: # list of doc_topics
+            for k, v in topic[0]:  # list of doc_topics
                 self.assertTrue(isinstance(k, int))
                 self.assertTrue(isinstance(v, float))
                 if len(topic[0]) != 0:
                     doc_topic_count_na += 1
 
-            for w, topic_list in topic[1]: # list of word_topics
+            for w, topic_list in topic[1]:  # list of word_topics
                 self.assertTrue(isinstance(w, int))
                 self.assertTrue(isinstance(topic_list, list))
 
-            for w, phi_values in topic[2]: # list of word_phis
+            for w, phi_values in topic[2]:  # list of word_phis
                 self.assertTrue(isinstance(w, int))
                 self.assertTrue(isinstance(phi_values, list))
                 if len(phi_values) != 0:
                     word_phi_count_na += 1
 
         self.assertTrue(model.state.numdocs > doc_topic_count_na)
-        self.assertTrue( sum([len(i) for i in corpus]) > word_phi_count_na)
-
+        self.assertTrue(sum([len(i) for i in corpus]) > word_phi_count_na)
 
         doc_topics, word_topics, word_phis = model.get_document_topics(self.corpus[1], per_word_topics=True)
 
@@ -309,8 +305,9 @@ def testGetDocumentTopics(self):
 
         # word_topics looks like this: ({word_id => [topic_id_most_probable, topic_id_second_most_probable, ...]).
         # we check one case in word_topics, i.e of the first word in the doc, and it's likely topics.
-        expected_word = 0
+
         # FIXME: Fails on osx and win
+        # expected_word = 0
         # self.assertEqual(word_topics[0][0], expected_word)
         # self.assertTrue(0 in word_topics[0][1])
 
@@ -325,9 +322,8 @@ def testTermTopics(self):
             self.assertTrue(isinstance(probability, float))
 
         # checks if topic '1' is in the result list
-         # FIXME: Fails on osx and win
-         # self.assertTrue(1 in result[0])
-
+        # FIXME: Fails on osx and win
+        # self.assertTrue(1 in result[0])
 
         # if user has entered word instead, check with word
         result = model.get_term_topics(str(model.id2word[2]))
@@ -336,9 +332,8 @@ def testTermTopics(self):
             self.assertTrue(isinstance(probability, float))
 
         # checks if topic '1' is in the result list
-         # FIXME: Fails on osx and win
-         # self.assertTrue(1 in result[0])
-
+        # FIXME: Fails on osx and win
+        # self.assertTrue(1 in result[0])
 
     def testPasses(self):
         # long message includes the original error message with a custom one
@@ -407,7 +402,7 @@ def testPersistence(self):
         self.assertEqual(model.num_topics, model2.num_topics)
         self.assertTrue(np.allclose(model.expElogbeta, model2.expElogbeta))
         tstvec = []
-        self.assertTrue(np.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
+        self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testModelCompatibilityWithPythonVersions(self):
         fname_model_2_7 = datapath('ldamodel_python_2_7')
@@ -417,12 +412,11 @@ def testModelCompatibilityWithPythonVersions(self):
         self.assertEqual(model_2_7.num_topics, model_3_5.num_topics)
         self.assertTrue(np.allclose(model_2_7.expElogbeta, model_3_5.expElogbeta))
         tstvec = []
-        self.assertTrue(np.allclose(model_2_7[tstvec], model_3_5[tstvec])) # try projecting an empty vector
-        id2word_2_7 = dict((k,v) for k,v in model_2_7.id2word.iteritems())
-        id2word_3_5 = dict((k,v) for k,v in model_3_5.id2word.iteritems())
+        self.assertTrue(np.allclose(model_2_7[tstvec], model_3_5[tstvec]))  # try projecting an empty vector
+        id2word_2_7 = dict((k, v) for k, v in model_2_7.id2word.iteritems())
+        id2word_3_5 = dict((k, v) for k, v in model_3_5.id2word.iteritems())
         self.assertEqual(set(id2word_2_7.keys()), set(id2word_3_5.keys()))
 
-
     def testPersistenceIgnore(self):
         fname = testfile('testPersistenceIgnore')
         model = ldamodel.LdaModel(self.corpus, num_topics=2)
@@ -442,22 +436,22 @@ def testPersistenceCompressed(self):
         self.assertEqual(model.num_topics, model2.num_topics)
         self.assertTrue(np.allclose(model.expElogbeta, model2.expElogbeta))
         tstvec = []
-        self.assertTrue(np.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
+        self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testLargeMmap(self):
         fname = testfile()
         model = self.model
 
         # simulate storing large arrays separately
-        model.save(testfile(), sep_limit=0)
+        model.save(fname, sep_limit=0)
 
         # test loading the large model arrays with mmap
-        model2 = self.class_.load(testfile(), mmap='r')
+        model2 = self.class_.load(fname, mmap='r')
         self.assertEqual(model.num_topics, model2.num_topics)
         self.assertTrue(isinstance(model2.expElogbeta, np.memmap))
         self.assertTrue(np.allclose(model.expElogbeta, model2.expElogbeta))
         tstvec = []
-        self.assertTrue(np.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
+        self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testLargeMmapCompressed(self):
         fname = testfile() + '.gz'
@@ -493,7 +487,7 @@ def testRandomStateBackwardCompatibility(self):
             self.assertTrue(isinstance(i[0], int))
             self.assertTrue(isinstance(i[1], six.string_types))
 
-#endclass TestLdaModel
+# endclass TestLdaModel
 
 
 class TestLdaMulticore(TestLdaModel):
@@ -507,7 +501,7 @@ def testAlphaAuto(self):
         self.assertRaises(RuntimeError, self.class_, alpha='auto')
 
 
-#endclass TestLdaMulticore
+# endclass TestLdaMulticore
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_ldaseqmodel.py b/gensim/test/test_ldaseqmodel.py
index d2924921e6..4ca7b104fc 100644
--- a/gensim/test/test_ldaseqmodel.py
+++ b/gensim/test/test_ldaseqmodel.py
@@ -5,19 +5,19 @@
 """
 
 import numpy as np  # for arrays, array broadcasting etc.
-from gensim.models import ldaseqmodel, ldamodel
+from gensim.models import ldaseqmodel
 from gensim.corpora import Dictionary
 import os.path
 import unittest
 import logging
 
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data/DTM', fname)
 
 
 class TestLdaSeq(unittest.TestCase):
-	# we are setting up a DTM model and fitting it, and checking topic-word and doc-topic results.
+        # we are setting up a DTM model and fitting it, and checking topic-word and doc-topic results.
     def setUp(self):
         texts = [
             [u'senior', u'studios', u'studios', u'studios', u'creators', u'award', u'mobile', u'currently', u'challenges', u'senior', u'summary', u'senior', u'motivated', u'creative', u'senior'],
@@ -56,7 +56,7 @@ def setUp(self):
         sstats = np.loadtxt(datapath('sstats_test.txt'))
         dictionary = Dictionary(texts)
         corpus = [dictionary.doc2bow(text) for text in texts]
-        self.ldaseq = ldaseqmodel.LdaSeqModel(corpus = corpus , id2word= dictionary, num_topics=2, time_slice=[10, 10, 11], initialize='own', sstats=sstats)
+        self.ldaseq = ldaseqmodel.LdaSeqModel(corpus=corpus, id2word=dictionary, num_topics=2, time_slice=[10, 10, 11], initialize='own', sstats=sstats)
 
     # testing topic word proportions
     def testTopicWord(self):
@@ -72,6 +72,7 @@ def testDocTopic(self):
         expected_doc_topic = 0.00066577896138482028
         self.assertAlmostEqual(doc_topic[0], expected_doc_topic, places=2)
 
+
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
     unittest.main()
diff --git a/gensim/test/test_ldavowpalwabbit_wrapper.py b/gensim/test/test_ldavowpalwabbit_wrapper.py
index 74e3591f4f..3cf6f9f6bb 100644
--- a/gensim/test/test_ldavowpalwabbit_wrapper.py
+++ b/gensim/test/test_ldavowpalwabbit_wrapper.py
@@ -27,7 +27,7 @@
 from gensim.models.wrappers.ldavowpalwabbit import LdaVowpalWabbit
 
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 
@@ -69,7 +69,7 @@ def setUp(self):
 
     def test_save_load(self):
         """Test loading/saving LdaVowpalWabbit model."""
-        if not self.vw_path: # for python 2.6
+        if not self.vw_path:  # for python 2.6
             return
         lda = LdaVowpalWabbit(self.vw_path,
                               corpus=self.corpus,
@@ -104,7 +104,7 @@ def test_save_load(self):
 
     def test_model_update(self):
         """Test updating existing LdaVowpalWabbit model."""
-        if not self.vw_path: # for python 2.6
+        if not self.vw_path:  # for python 2.6
             return
         lda = LdaVowpalWabbit(self.vw_path,
                               corpus=[self.corpus[0]],
@@ -124,7 +124,7 @@ def test_model_update(self):
 
     def test_perplexity(self):
         """Test LdaVowpalWabbit perplexity is within expected range."""
-        if not self.vw_path: # for python 2.6
+        if not self.vw_path:  # for python 2.6
             return
         lda = LdaVowpalWabbit(self.vw_path,
                               corpus=self.corpus,
@@ -144,7 +144,7 @@ def test_perplexity(self):
 
     def test_topic_coherence(self):
         """Test LdaVowpalWabbit topic coherence."""
-        if not self.vw_path: # for python 2.6
+        if not self.vw_path:  # for python 2.6
             return
         corpus, dictionary = get_corpus()
         lda = LdaVowpalWabbit(self.vw_path,
@@ -196,7 +196,7 @@ def test_topic_coherence(self):
 
     def test_corpus_to_vw(self):
         """Test corpus to Vowpal Wabbit format conversion."""
-        if not self.vw_path: # for python 2.6
+        if not self.vw_path:  # for python 2.6
             return
         corpus = [[(0, 5), (7, 1), (5, 3), (0, 2)],
                   [(7, 2), (2, 1), (3, 11)],
diff --git a/gensim/test/test_lee.py b/gensim/test/test_lee.py
index 7a6dbab498..b51101c8b1 100644
--- a/gensim/test/test_lee.py
+++ b/gensim/test/test_lee.py
@@ -65,7 +65,6 @@ def setUp(self):
         sim_m_size = np.shape(sim_matrix)[0]
         human_sim_vector = sim_matrix[matutils.triu_indices(sim_m_size, 1)]
 
-
     def test_corpus(self):
         """availability and integrity of corpus"""
         documents_in_bg_corpus = 300
@@ -75,7 +74,6 @@ def test_corpus(self):
         self.assertEqual(len(corpus), documents_in_corpus)
         self.assertEqual(len(human_sim_vector), len_sim_vector)
 
-
     def test_lee(self):
         """correlation with human data > 0.6
         (this is the value which was achieved in the original paper)
@@ -108,7 +106,6 @@ def test_lee(self):
         logging.info("LSI correlation coefficient is %s" % cor)
         self.assertTrue(cor > 0.6)
 
-
     # def test_lee_mallet(self):
     #     global bg_corpus, corpus, bg_corpus2, corpus2
 
@@ -134,7 +131,6 @@ def test_lee(self):
     #     self.assertTrue(cor > 0.35)
 
 
-
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
     unittest.main()
diff --git a/gensim/test/test_logentropy_model.py b/gensim/test/test_logentropy_model.py
index 08d4e6e713..07f982dad9 100644
--- a/gensim/test/test_logentropy_model.py
+++ b/gensim/test/test_logentropy_model.py
@@ -15,15 +15,12 @@
 import os.path
 import tempfile
 
-import six
 import numpy as np
-import scipy.linalg
 
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models import logentropy_model
-from gensim import matutils
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 
@@ -51,7 +48,6 @@ def setUp(self):
         self.corpus_small = mmcorpus.MmCorpus(datapath('test_corpus_small.mm'))
         self.corpus_ok = mmcorpus.MmCorpus(datapath('test_corpus_ok.mm'))
 
-
     def testTransform(self):
         # create the transformation model
         model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=False)
@@ -65,7 +61,6 @@ def testTransform(self):
                     (3, 1.20941755462856)]
         self.assertTrue(np.allclose(transformed, expected))
 
-
     def testPersistence(self):
         fname = testfile()
         model = logentropy_model.LogEntropyModel(self.corpus_ok, normalize=True)
@@ -83,7 +78,7 @@ def testPersistenceCompressed(self):
         self.assertTrue(model.entr == model2.entr)
         tstvec = []
         self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))
-#endclass TestLogEntropyModel
+# endclass TestLogEntropyModel
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_lsimodel.py b/gensim/test/test_lsimodel.py
index 012ac6b8f5..e2c32bda66 100644
--- a/gensim/test/test_lsimodel.py
+++ b/gensim/test/test_lsimodel.py
@@ -76,11 +76,11 @@ def testCorpusTransform(self):
         model = self.model
         got = np.vstack(matutils.sparse2full(doc, 2) for doc in model[self.corpus])
         expected = np.array([
-            [0.65946639,  0.14211544],
+            [0.65946639, 0.14211544],
             [2.02454305, -0.42088759],
-            [1.54655361,  0.32358921],
-            [1.81114125,  0.5890525 ],
-            [0.9336738 , -0.27138939],
+            [1.54655361, 0.32358921],
+            [1.81114125, 0.5890525],
+            [0.9336738, -0.27138939],
             [0.01274618, -0.49016181],
             [0.04888203, -1.11294699],
             [0.08063836, -1.56345594],
diff --git a/gensim/test/test_miislita.py b/gensim/test/test_miislita.py
index e698cae444..dd660f629f 100644
--- a/gensim/test/test_miislita.py
+++ b/gensim/test/test_miislita.py
@@ -71,7 +71,6 @@ def test_textcorpus(self):
         miislita2 = corpora.MmCorpus(ftmp)
         self.assertEqual(list(miislita), list(miislita2))
 
-
     def test_save_load_ability(self):
         """
         Make sure we can save and load (un/pickle) TextCorpus objects (as long
@@ -90,7 +89,6 @@ def test_save_load_ability(self):
         self.assertEqual(len(miislita), len(miislita2))
         self.assertEqual(miislita.dictionary.token2id, miislita2.dictionary.token2id)
 
-
     def test_miislita_high_level(self):
         # construct corpus from file
         miislita = CorpusMiislita(datapath('miIslita.cor'))
diff --git a/gensim/test/test_normmodel.py b/gensim/test/test_normmodel.py
index 8d1315e75d..77221b4a4d 100644
--- a/gensim/test/test_normmodel.py
+++ b/gensim/test/test_normmodel.py
@@ -22,7 +22,7 @@
 from gensim.corpora import mmcorpus
 from gensim.models import normmodel
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 
@@ -136,7 +136,7 @@ def testPersistence(self):
         model2 = normmodel.NormModel.load(fname)
         self.assertTrue(model.norms == model2.norms)
         tstvec = []
-        self.assertTrue(np.allclose(model.normalize(tstvec), model2.normalize(tstvec))) # try projecting an empty vector
+        self.assertTrue(np.allclose(model.normalize(tstvec), model2.normalize(tstvec)))  # try projecting an empty vector
 
     def testPersistenceCompressed(self):
         fname = testfile() + '.gz'
@@ -145,7 +145,7 @@ def testPersistenceCompressed(self):
         model2 = normmodel.NormModel.load(fname, mmap=None)
         self.assertTrue(model.norms == model2.norms)
         tstvec = []
-        self.assertTrue(np.allclose(model.normalize(tstvec), model2.normalize(tstvec))) # try projecting an empty vector
+        self.assertTrue(np.allclose(model.normalize(tstvec), model2.normalize(tstvec)))  # try projecting an empty vector
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_parsing.py b/gensim/test/test_parsing.py
index 35dc9efabf..fd0429e9f6 100644
--- a/gensim/test/test_parsing.py
+++ b/gensim/test/test_parsing.py
@@ -8,8 +8,7 @@
 import logging
 import unittest
 import numpy as np
-
-from gensim.parsing.preprocessing import *
+from gensim.parsing.preprocessing import remove_stopwords, strip_punctuation2, strip_tags, strip_short, strip_numeric, strip_non_alphanum, strip_multiple_whitespaces, split_alphanum, stem_text
 
 
 # several documents
@@ -43,7 +42,6 @@
 classes = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
 
 
-
 class TestPreprocessing(unittest.TestCase):
 
     def testStripNumeric(self):
diff --git a/gensim/test/test_phrases.py b/gensim/test/test_phrases.py
index 688f92dbd0..5397d6e4c3 100644
--- a/gensim/test/test_phrases.py
+++ b/gensim/test/test_phrases.py
@@ -33,7 +33,7 @@
     ['graph', 'trees'],
     ['graph', 'minors', 'trees'],
     ['graph', 'minors', 'survey'],
-    ['graph', 'minors', 'survey','human','interface'] #test bigrams within same sentence
+    ['graph', 'minors', 'survey', 'human', 'interface']  # test bigrams within same sentence
 ]
 unicode_sentences = [[utils.to_unicode(w) for w in sentence] for sentence in sentences]
 
@@ -44,6 +44,7 @@ def gen_sentences():
 
 class TestPhrasesCommon(unittest.TestCase):
     """ Tests that need to be run for both Prases and Phraser classes."""
+
     def setUp(self):
         self.bigram = Phrases(sentences, min_count=1, threshold=1)
         self.bigram_default = Phrases(sentences)
@@ -195,11 +196,12 @@ def testPruning(self):
         """Test that max_vocab_size parameter is respected."""
         bigram = Phrases(sentences, max_vocab_size=5)
         self.assertTrue(len(bigram.vocab) <= 5)
-#endclass TestPhrasesModel
+# endclass TestPhrasesModel
 
 
 class TestPhraserModel(TestPhrasesCommon):
     """ Test Phraser models."""
+
     def setUp(self):
         """Set up Phraser models for the tests."""
         bigram_phrases = Phrases(sentences, min_count=1, threshold=1)
diff --git a/gensim/test/test_rpmodel.py b/gensim/test/test_rpmodel.py
index 2abbc745dd..2de5dd6546 100644
--- a/gensim/test/test_rpmodel.py
+++ b/gensim/test/test_rpmodel.py
@@ -15,15 +15,13 @@
 import os.path
 import tempfile
 
-import six
 import numpy as np
-import scipy.linalg
 
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models import rpmodel
 from gensim import matutils
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 
@@ -46,24 +44,22 @@ def testfile():
     return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
 
 
-
 class TestRpModel(unittest.TestCase):
     def setUp(self):
         self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
 
     def testTransform(self):
         # create the transformation model
-        np.random.seed(13) # HACK; set fixed seed so that we always get the same random matrix (and can compare against expected results)
+        np.random.seed(13)  # HACK; set fixed seed so that we always get the same random matrix (and can compare against expected results)
         model = rpmodel.RpModel(self.corpus, num_topics=2)
 
         # transform one document
         doc = list(self.corpus)[0]
         transformed = model[doc]
-        vec = matutils.sparse2full(transformed, 2) # convert to dense vector, for easier equality tests
+        vec = matutils.sparse2full(transformed, 2)  # convert to dense vector, for easier equality tests
 
         expected = np.array([-0.70710677, 0.70710677])
-        self.assertTrue(np.allclose(vec, expected)) # transformed entries must be equal up to sign
-
+        self.assertTrue(np.allclose(vec, expected))  # transformed entries must be equal up to sign
 
     def testPersistence(self):
         fname = testfile()
@@ -73,7 +69,7 @@ def testPersistence(self):
         self.assertEqual(model.num_topics, model2.num_topics)
         self.assertTrue(np.allclose(model.projection, model2.projection))
         tstvec = []
-        self.assertTrue(np.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
+        self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testPersistenceCompressed(self):
         fname = testfile() + '.gz'
@@ -83,8 +79,8 @@ def testPersistenceCompressed(self):
         self.assertEqual(model.num_topics, model2.num_topics)
         self.assertTrue(np.allclose(model.projection, model2.projection))
         tstvec = []
-        self.assertTrue(np.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
-#endclass TestRpModel
+        self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
+# endclass TestRpModel
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_segmentation.py b/gensim/test/test_segmentation.py
index c437ca8fa8..a4c9356a26 100644
--- a/gensim/test/test_segmentation.py
+++ b/gensim/test/test_segmentation.py
@@ -49,6 +49,7 @@ def testSOneSet(self):
                 self.assertEqual(actual[s_i][j][0], expected[s_i][j][0])
                 self.assertTrue(np.allclose(actual[s_i][j][1], expected[s_i][j][1]))
 
+
 if __name__ == '__main__':
     logging.root.setLevel(logging.WARNING)
     unittest.main()
diff --git a/gensim/test/test_sharded_corpus.py b/gensim/test/test_sharded_corpus.py
index d50f618384..871048ea4e 100644
--- a/gensim/test/test_sharded_corpus.py
+++ b/gensim/test/test_sharded_corpus.py
@@ -18,10 +18,8 @@
 #############################################################################
 
 
-
 class TestShardedCorpus(unittest.TestCase):
 
-
     # @classmethod
     # def setUpClass(cls):
     #     cls.dim = 1000
@@ -73,7 +71,7 @@ def test_load(self):
 
     def test_getitem(self):
 
-        _ = self.corpus[130]
+        _ = self.corpus[130]  # noqa:F841
         # Does retrieving the item load the correct shard?
         self.assertEqual(self.corpus.current_shard_n, 1)
 
@@ -83,13 +81,13 @@ def test_getitem(self):
         self.assertEqual(self.corpus.current_shard_n, 2)
 
         for i in xrange(220, 227):
-            self.assertTrue(np.array_equal(self.corpus[i], item[i-220]))
+            self.assertTrue(np.array_equal(self.corpus[i], item[i - 220]))
 
     def test_sparse_serialization(self):
 
         no_exception = True
         try:
-            dataset = ShardedCorpus(self.tmp_fname, self.data, shardsize=100,
+            dataset = ShardedCorpus(self.tmp_fname, self.data, shardsize=100,  # noqa:F841
                                     dim=self.dim, sparse_serialization=True)
         except Exception:
             no_exception = False
@@ -236,7 +234,6 @@ def test_getitem_dense2gensim(self):
                                      i, j, str(ilist[i][j]), i, j,
                                      str(dslice[i][j])))
 
-
         iscorp, _ = is_corpus(ilist)
         self.assertTrue(iscorp, "Is the object returned by list notation "
                                 "a gensim corpus?")
@@ -257,6 +254,7 @@ def test_resize(self):
 
 ##############################################################################
 
+
 if __name__ == '__main__':
     suite = unittest.TestSuite()
     loader = unittest.TestLoader()
diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py
index a7a59c77d3..9ce5263df7 100644
--- a/gensim/test/test_similarities.py
+++ b/gensim/test/test_similarities.py
@@ -17,21 +17,21 @@
 import numpy
 import scipy
 
-from gensim.corpora import mmcorpus, Dictionary
+from gensim.corpora import Dictionary
 from gensim.models import word2vec
 from gensim.models import doc2vec
 from gensim.models import KeyedVectors
 from gensim.models.wrappers import fasttext
-from gensim import matutils, utils, similarities
+from gensim import matutils, similarities
 from gensim.models import Word2Vec
 
 try:
-    from pyemd import emd
+    from pyemd import emd  # noqa:F401
     PYEMD_EXT = True
 except ImportError:
     PYEMD_EXT = False
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 
@@ -61,6 +61,7 @@ class _TestSimilarityABC(object):
     """
     Base class for SparseMatrixSimilarity and MatrixSimilarity unit tests.
     """
+
     def testFull(self, num_best=None, shardsize=100):
         if self.cls == similarities.Similarity:
             index = self.cls(None, corpus, num_features=len(dictionary), shardsize=shardsize)
@@ -83,18 +84,17 @@ def testFull(self, num_best=None, shardsize=100):
         index.num_best = num_best
         query = corpus[0]
         sims = index[query]
-        expected = [(0, 0.99999994), (2, 0.28867513), (3, 0.23570226), (1, 0.23570226)][ : num_best]
+        expected = [(0, 0.99999994), (2, 0.28867513), (3, 0.23570226), (1, 0.23570226)][: num_best]
 
         # convert sims to full numpy arrays, so we can use allclose() and ignore
         # ordering of items with the same similarity value
         expected = matutils.sparse2full(expected, len(index))
-        if num_best is not None: # when num_best is None, sims is already a numpy array
+        if num_best is not None:  # when num_best is None, sims is already a numpy array
             sims = matutils.sparse2full(sims, len(index))
         self.assertTrue(numpy.allclose(expected, sims))
         if self.cls == similarities.Similarity:
             index.destroy()
 
-
     def testNumBest(self):
 
         if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
@@ -126,7 +126,6 @@ def test_scipy2scipy_clipped(self):
         self.assertTrue(scipy.sparse.issparse(matrix_scipy_clipped))
         self.assertTrue([matutils.scipy2sparse(x) for x in matrix_scipy_clipped], [expected] * 3)
 
-
     def testChunking(self):
         if self.cls == similarities.Similarity:
             index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
@@ -135,9 +134,9 @@ def testChunking(self):
         query = corpus[:3]
         sims = index[query]
         expected = numpy.array([
-            [0.99999994, 0.23570226, 0.28867513, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.0 ],
-            [0.23570226, 1.0, 0.40824831, 0.33333334, 0.70710677, 0.0, 0.0, 0.0, 0.23570226 ],
-            [0.28867513, 0.40824831, 1.0, 0.61237246, 0.28867513, 0.0, 0.0, 0.0, 0.0 ]
+            [0.99999994, 0.23570226, 0.28867513, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.23570226, 1.0, 0.40824831, 0.33333334, 0.70710677, 0.0, 0.0, 0.0, 0.23570226],
+            [0.28867513, 0.40824831, 1.0, 0.61237246, 0.28867513, 0.0, 0.0, 0.0, 0.0]
             ], dtype=numpy.float32)
         self.assertTrue(numpy.allclose(expected, sims))
 
@@ -151,7 +150,6 @@ def testChunking(self):
         if self.cls == similarities.Similarity:
             index.destroy()
 
-
     def testIter(self):
         if self.cls == similarities.Similarity:
             index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
@@ -159,21 +157,20 @@ def testIter(self):
             index = self.cls(corpus, num_features=len(dictionary))
         sims = [sim for sim in index]
         expected = numpy.array([
-            [ 0.99999994, 0.23570226, 0.28867513, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.0 ],
-            [ 0.23570226, 1.0, 0.40824831, 0.33333334, 0.70710677, 0.0, 0.0, 0.0, 0.23570226 ],
-            [ 0.28867513, 0.40824831, 1.0, 0.61237246, 0.28867513, 0.0, 0.0, 0.0, 0.0 ],
-            [ 0.23570226, 0.33333334, 0.61237246, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0 ],
-            [ 0.0, 0.70710677, 0.28867513, 0.0, 0.99999994, 0.0, 0.0, 0.0, 0.0 ],
-            [ 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.70710677, 0.57735026, 0.0 ],
-            [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.70710677, 0.99999994, 0.81649655, 0.40824828 ],
-            [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.81649655, 0.99999994, 0.66666663 ],
-            [ 0.0, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.40824828, 0.66666663, 0.99999994 ]
+            [0.99999994, 0.23570226, 0.28867513, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.23570226, 1.0, 0.40824831, 0.33333334, 0.70710677, 0.0, 0.0, 0.0, 0.23570226],
+            [0.28867513, 0.40824831, 1.0, 0.61237246, 0.28867513, 0.0, 0.0, 0.0, 0.0],
+            [0.23570226, 0.33333334, 0.61237246, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.70710677, 0.28867513, 0.0, 0.99999994, 0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.70710677, 0.57735026, 0.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.70710677, 0.99999994, 0.81649655, 0.40824828],
+            [0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.81649655, 0.99999994, 0.66666663],
+            [0.0, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.40824828, 0.66666663, 0.99999994]
             ], dtype=numpy.float32)
         self.assertTrue(numpy.allclose(expected, sims))
         if self.cls == similarities.Similarity:
             index.destroy()
 
-
     def testPersistency(self):
         if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
             return
@@ -278,7 +275,6 @@ def testLargeCompressed(self):
             self.assertTrue(numpy.allclose(index.index, index2.index))
             self.assertEqual(index.num_best, index2.num_best)
 
-
     def testMmap(self):
         if self.cls == similarities.WmdSimilarity and not PYEMD_EXT:
             return
@@ -324,10 +320,12 @@ def testMmapCompressed(self):
         # same thing, but use mmap to load arrays
         self.assertRaises(IOError, self.cls.load, fname, mmap='r')
 
+
 class TestMatrixSimilarity(unittest.TestCase, _TestSimilarityABC):
     def setUp(self):
         self.cls = similarities.MatrixSimilarity
 
+
 class TestWmdSimilarity(unittest.TestCase, _TestSimilarityABC):
     def setUp(self):
         self.cls = similarities.WmdSimilarity
@@ -437,7 +435,6 @@ def testMaintainSparsityWithNumBest(self):
         self.assertEqual(dense_topn_sims, [matutils.scipy2sparse(v) for v in scipy_topn_sims])
 
 
-
 class TestSimilarity(unittest.TestCase, _TestSimilarityABC):
     def setUp(self):
         self.cls = similarities.Similarity
@@ -450,7 +447,7 @@ def testSharding(self):
     def testReopen(self):
         """test re-opening partially full shards"""
         index = similarities.Similarity(None, corpus[:5], num_features=len(dictionary), shardsize=9)
-        _ = index[corpus[0]] # forces shard close
+        _ = index[corpus[0]]  # noqa:F841 forces shard close
         index.add_documents(corpus[5:])
         query = corpus[0]
         sims = index[query]
@@ -464,7 +461,6 @@ def testMmapCompressed(self):
         # turns out this test doesn't exercise this because there are no arrays
         # to be mmaped!
 
-
     def testChunksize(self):
         index = self.cls(None, corpus, num_features=len(dictionary), shardsize=5)
         expected = [sim for sim in index]
@@ -478,7 +474,7 @@ class TestWord2VecAnnoyIndexer(unittest.TestCase):
 
     def setUp(self):
         try:
-            import annoy
+            import annoy  # noqa:F401
         except ImportError:
             raise unittest.SkipTest("Annoy library is not available")
 
@@ -565,11 +561,12 @@ def assertLoadedIndexEqual(self, index, model):
         self.assertEqual(index.labels, index2.labels)
         self.assertEqual(index.num_trees, index2.num_trees)
 
+
 class TestDoc2VecAnnoyIndexer(unittest.TestCase):
 
     def setUp(self):
         try:
-            import annoy
+            import annoy  # noqa:F401
         except ImportError:
             raise unittest.SkipTest("Annoy library is not available")
 
diff --git a/gensim/test/test_similarity_metrics.py b/gensim/test/test_similarity_metrics.py
index 858e388e77..23bf0e60f7 100644
--- a/gensim/test/test_similarity_metrics.py
+++ b/gensim/test/test_similarity_metrics.py
@@ -20,7 +20,7 @@
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models import ldamodel
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
@@ -37,7 +37,6 @@
 corpus = [dictionary.doc2bow(text) for text in texts]
 
 
-
 class TestIsBow(unittest.TestCase):
     def test_None(self):
         # test None
@@ -64,7 +63,7 @@ def test_bow(self):
         potentialbow = []
         result = matutils.isbow(potentialbow)
         expected = True
-        self.assertEqual(expected, result)     
+        self.assertEqual(expected, result)
 
         # checking corpus; should return false
         potentialbow = [[(2, 1), (3, 1), (4, 1), (5, 1), (1, 1), (7, 1)]]
@@ -85,11 +84,12 @@ def test_bow(self):
         self.assertEqual(expected, result)
 
         # checking np array format bag of words
-        potentialbow = np.array([[1, 0.4], [0, 0.2],[2, 0.2]])
+        potentialbow = np.array([[1, 0.4], [0, 0.2], [2, 0.2]])
         result = matutils.isbow(potentialbow)
         expected = True
         self.assertEqual(expected, result)
 
+
 class TestHellinger(unittest.TestCase):
     def setUp(self):
         self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
@@ -128,7 +128,6 @@ def test_distributions(self):
         expected = 0.185241936534
         self.assertAlmostEqual(expected, result)
 
-
         # checking ndarray, csr_matrix as inputs
         vec_1 = np.array([[1, 0.3], [0, 0.4], [2, 0.3]])
         vec_2 = csr_matrix([[1, 0.4], [0, 0.2], [2, 0.2]])
@@ -143,15 +142,16 @@ def test_distributions(self):
         expected = 0.309742984153
         self.assertAlmostEqual(expected, result)
 
-         # testing LDA distribution vectors
+        # testing LDA distribution vectors
         np.random.seed(0)
-        model = self.class_(self.corpus, id2word=dictionary, num_topics=2, passes= 100)
+        model = self.class_(self.corpus, id2word=dictionary, num_topics=2, passes=100)
         lda_vec1 = model[[(1, 2), (2, 3)]]
         lda_vec2 = model[[(2, 2), (1, 3)]]
         result = matutils.hellinger(lda_vec1, lda_vec2)
         expected = 1.0406845281146034e-06
         self.assertAlmostEqual(expected, result)
 
+
 class TestKL(unittest.TestCase):
     def setUp(self):
         self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
@@ -212,20 +212,21 @@ def test_distributions(self):
 
         # testing LDA distribution vectors
         np.random.seed(0)
-        model = self.class_(self.corpus, id2word=dictionary, num_topics=2, passes= 100)
+        model = self.class_(self.corpus, id2word=dictionary, num_topics=2, passes=100)
         lda_vec1 = model[[(1, 2), (2, 3)]]
         lda_vec2 = model[[(2, 2), (1, 3)]]
         result = matutils.kullback_leibler(lda_vec1, lda_vec2)
         expected = 4.283407e-12
         self.assertAlmostEqual(expected, result, places=5)
 
+
 class TestJaccard(unittest.TestCase):
     def test_inputs(self):
 
         # all empty inputs will give a divide by zero exception
         vec_1 = []
         vec_2 = []
-        self.assertRaises(ZeroDivisionError, matutils.jaccard , vec_1, vec_2)
+        self.assertRaises(ZeroDivisionError, matutils.jaccard, vec_1, vec_2)
 
     def test_distributions(self):
 
@@ -250,6 +251,7 @@ def test_distributions(self):
         expected = 1 - 0.333333333333
         self.assertAlmostEqual(expected, result)
 
+
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
     unittest.main()
diff --git a/gensim/test/test_sklearn_api.py b/gensim/test/test_sklearn_api.py
index 997b3d9fb1..ade44e900a 100644
--- a/gensim/test/test_sklearn_api.py
+++ b/gensim/test/test_sklearn_api.py
@@ -966,5 +966,6 @@ def testModelNotFitted(self):
         phrases_transformer = PhrasesTransformer()
         self.assertRaises(NotFittedError, phrases_transformer.transform, phrases_sentences[0])
 
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/gensim/test/test_summarization.py b/gensim/test/test_summarization.py
index a1ba6f223a..ce44c5f0a3 100644
--- a/gensim/test/test_summarization.py
+++ b/gensim/test/test_summarization.py
@@ -87,7 +87,7 @@ def test_text_summarization_raises_exception_on_short_input_text(self):
         text = "\n".join(text.split('\n')[:8])
 
         self.assertTrue(summarize(text) is not None)
-        
+
     def test_text_summarization_returns_input_on_single_input_sentence(self):
         pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
 
@@ -107,7 +107,7 @@ def test_corpus_summarization_raises_exception_on_short_input_text(self):
 
         # Keeps the first 8 sentences to make the text shorter.
         sentences = text.split('\n')[:8]
-        
+
         # Generate the corpus.
         tokens = [sentence.split() for sentence in sentences]
         dictionary = Dictionary(tokens)
@@ -185,6 +185,7 @@ def test_low_distinct_words_corpus_summarization_is_none(self):
 
         self.assertTrue(summarize_corpus(corpus) is None)
 
+
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
     unittest.main()
diff --git a/gensim/test/test_tfidfmodel.py b/gensim/test/test_tfidfmodel.py
index e7dc62e960..bb00b5482d 100644
--- a/gensim/test/test_tfidfmodel.py
+++ b/gensim/test/test_tfidfmodel.py
@@ -15,15 +15,12 @@
 import os.path
 import tempfile
 
-import six
 import numpy as np
-import scipy.linalg
 
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models import tfidfmodel
-from gensim import matutils
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 
@@ -46,7 +43,6 @@ def testfile():
     return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
 
 
-
 class TestTfidfModel(unittest.TestCase):
     def setUp(self):
         self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
@@ -62,7 +58,6 @@ def testTransform(self):
         expected = [(0, 0.57735026918962573), (1, 0.57735026918962573), (2, 0.57735026918962573)]
         self.assertTrue(np.allclose(transformed, expected))
 
-
     def testInit(self):
         # create the transformation model by analyzing a corpus
         # uses the global `corpus`!
@@ -77,7 +72,6 @@ def testInit(self):
         model2 = tfidfmodel.TfidfModel(dictionary=dictionary)
         self.assertEqual(model1.idfs, model2.idfs)
 
-
     def testPersistence(self):
         fname = testfile()
         model = tfidfmodel.TfidfModel(self.corpus, normalize=True)
@@ -85,7 +79,7 @@ def testPersistence(self):
         model2 = tfidfmodel.TfidfModel.load(fname)
         self.assertTrue(model.idfs == model2.idfs)
         tstvec = []
-        self.assertTrue(np.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
+        self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
 
     def testPersistenceCompressed(self):
         fname = testfile() + '.gz'
@@ -94,9 +88,8 @@ def testPersistenceCompressed(self):
         model2 = tfidfmodel.TfidfModel.load(fname, mmap=None)
         self.assertTrue(model.idfs == model2.idfs)
         tstvec = []
-        self.assertTrue(np.allclose(model[tstvec], model2[tstvec])) # try projecting an empty vector
-#endclass TestTfidfModel
-
+        self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
+# endclass TestTfidfModel
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_utils.py b/gensim/test/test_utils.py
index 612d55dd68..ae94b5c2ad 100644
--- a/gensim/test/test_utils.py
+++ b/gensim/test/test_utils.py
@@ -83,14 +83,15 @@ def test_decode_entities(self):
         expected = u'It\x92s the Year of the Horse. YES VIN DIESEL \U0001f64c \U0001f4af'
         self.assertEquals(utils.decode_htmlentities(body), expected)
 
+
 class TestSampleDict(unittest.TestCase):
     def test_sample_dict(self):
-        d = {1:2,2:3,3:4,4:5}
-        expected_dict = [(1,2),(2,3)]
-        expected_dict_random = [(k,v) for k,v in iteritems(d)]
-        sampled_dict = utils.sample_dict(d,2,False)
-        self.assertEqual(sampled_dict,expected_dict)
-        sampled_dict_random = utils.sample_dict(d,2)
+        d = {1: 2, 2: 3, 3: 4, 4: 5}
+        expected_dict = [(1, 2), (2, 3)]
+        expected_dict_random = [(k, v) for k, v in iteritems(d)]
+        sampled_dict = utils.sample_dict(d, 2, False)
+        self.assertEqual(sampled_dict, expected_dict)
+        sampled_dict_random = utils.sample_dict(d, 2)
         if sampled_dict_random in expected_dict_random:
             self.assertTrue(True)
 
diff --git a/gensim/test/test_varembed_wrapper.py b/gensim/test/test_varembed_wrapper.py
index 5b83bbf50b..2053f7ffc3 100644
--- a/gensim/test/test_varembed_wrapper.py
+++ b/gensim/test/test_varembed_wrapper.py
@@ -59,6 +59,7 @@ def testAddMorphemesToEmbeddings(self):
         self.model_sanity(model_with_morphemes)
         # Check syn0 is different for both models.
         self.assertFalse(np.allclose(model.syn0, model_with_morphemes.syn0))
+
     def testLookup(self):
         """Test lookup of vector for a particular word and list"""
         model = varembed.VarEmbed.load_varembed_format(vectors=varembed_model_vector_file)
diff --git a/gensim/test/test_wikicorpus.py b/gensim/test/test_wikicorpus.py
index d8b8b721c9..9bdbcbdb8d 100644
--- a/gensim/test/test_wikicorpus.py
+++ b/gensim/test/test_wikicorpus.py
@@ -10,8 +10,6 @@
 
 
 import os
-import sys
-import types
 import logging
 import unittest
 
@@ -49,9 +47,9 @@ def test_first_element(self):
         """
         wc = WikiCorpus(datapath(FILENAME), processes=1)
 
-        l = wc.get_texts()
-        self.assertTrue(u'anarchism' in next(l))
-        self.assertTrue(u'autism' in next(l))
+        texts = wc.get_texts()
+        self.assertTrue(u'anarchism' in next(texts))
+        self.assertTrue(u'autism' in next(texts))
 
     def test_unicode_element(self):
         """
@@ -60,8 +58,8 @@ def test_unicode_element(self):
         """
         wc = WikiCorpus(datapath(FILENAME_U), processes=1)
 
-        l = wc.get_texts()
-        self.assertTrue(u'папа' in next(l))
+        texts = wc.get_texts()
+        self.assertTrue(u'папа' in next(texts))
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py
index bd8b9c7668..29ae713b90 100644
--- a/gensim/test/test_word2vec.py
+++ b/gensim/test/test_word2vec.py
@@ -13,25 +13,22 @@
 import unittest
 import os
 import tempfile
-import itertools
 import bz2
 import sys
 
 import numpy as np
 
-from gensim import utils, matutils
-from gensim.utils import check_output
-from subprocess import PIPE
+from gensim import utils
 from gensim.models import word2vec, keyedvectors
 from testfixtures import log_capture
 
 try:
-    from pyemd import emd
+    from pyemd import emd  # noqa:F401
     PYEMD_EXT = True
 except ImportError:
     PYEMD_EXT = False
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 
@@ -41,6 +38,7 @@ def __iter__(self):
             for line in f:
                 yield utils.simple_preprocess(line)
 
+
 list_corpus = list(LeeCorpus())
 
 sentences = [
@@ -64,22 +62,27 @@ def __iter__(self):
     ['artificial', 'intelligence', 'system']
 ]
 
+
 def testfile():
     # temporary data will be stored to this file
     return os.path.join(tempfile.gettempdir(), 'gensim_word2vec.tst')
 
+
 def _rule(word, count, min_count):
     if word == "human":
         return utils.RULE_DISCARD  # throw out
     else:
         return utils.RULE_DEFAULT  # apply default rule, i.e. min_count
+
+
 def load_on_instance():
     # Save and load a Word2Vec Model on instance for test
     model = word2vec.Word2Vec(sentences, min_count=1)
     model.save(testfile())
-    model = word2vec.Word2Vec() # should fail at this point
+    model = word2vec.Word2Vec()  # should fail at this point
     return model.load(testfile())
 
+
 class TestWord2VecModel(unittest.TestCase):
     def testOnlineLearning(self):
         """Test that the algorithm is able to add new words to the
@@ -106,7 +109,6 @@ def testOnlineLearningAfterSave(self):
         model_neg.train(new_sentences, total_examples=model_neg.corpus_count, epochs=model_neg.iter)
         self.assertEqual(len(model_neg.wv.vocab), 14)
 
-
     def onlineSanity(self, model):
         terro, others = [], []
         for l in list_corpus:
@@ -201,7 +203,7 @@ def testSyn0NormNotSaved(self):
     def testLoadPreKeyedVectorModel(self):
         """Test loading pre-KeyedVectors word2vec model"""
 
-        if sys.version_info[:2] == (3,4):
+        if sys.version_info[:2] == (3, 4):
             model_file_suffix = '_py3_4'
         elif sys.version_info < (3,):
             model_file_suffix = '_py2'
@@ -251,7 +253,6 @@ def testNoTrainingCFormat(self):
         binary_model.wv = kv
         self.assertRaises(ValueError, binary_model.train, sentences)
 
-
     def testTooShortBinaryWord2VecFormat(self):
         tfile = testfile()
         model = word2vec.Word2Vec(sentences, min_count=1)
@@ -303,7 +304,6 @@ def testPersistenceKeyedVectorsFormatWithVocab(self):
         kv_binary_model_with_vocab = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), testvocab, binary=True)
         self.assertEqual(model.wv.vocab['human'].count, kv_binary_model_with_vocab.vocab['human'].count)
 
-
     def testPersistenceWord2VecFormatCombinationWithStandardPersistence(self):
         """Test storing/loading the entire model and vocabulary in word2vec format chained with
          saving and loading via `save` and `load` methods`.
@@ -316,7 +316,6 @@ def testPersistenceWord2VecFormatCombinationWithStandardPersistence(self):
         binary_model_with_vocab_kv.save(testfile())
         self.assertRaises(AttributeError, word2vec.Word2Vec.load, testfile())
 
-
     def testLargeMmap(self):
         """Test storing/loading the entire model."""
         model = word2vec.Word2Vec(sentences, min_count=1)
@@ -352,7 +351,7 @@ def testVocab(self):
         self.assertRaises(RuntimeError, word2vec.Word2Vec, [])
 
         # input not empty, but rather completely filtered out
-        self.assertRaises(RuntimeError, word2vec.Word2Vec, corpus, min_count=total_words+1)
+        self.assertRaises(RuntimeError, word2vec.Word2Vec, corpus, min_count=total_words + 1)
 
     def testTraining(self):
         """Test word2vec training."""
@@ -573,7 +572,7 @@ def testParallel(self):
 
         for workers in [2, 4]:
             model = word2vec.Word2Vec(corpus, workers=workers)
-            sims = model.most_similar('israeli')
+            sims = model.most_similar('israeli')  # noqa:F841
             # the exact vectors and therefore similarities may differ, due to different thread collisions/randomization
             # so let's test only for top3
             # TODO: commented out for now; find a more robust way to compare against "gold standard"
@@ -622,16 +621,16 @@ def testNormalizeAfterTrainingData(self):
 
     def testPredictOutputWord(self):
         '''Test word2vec predict_output_word method handling for negative sampling scheme'''
-        #under normal circumstances
+        # under normal circumstances
         model_with_neg = word2vec.Word2Vec(sentences, min_count=1)
         predictions_with_neg = model_with_neg.predict_output_word(['system', 'human'], topn=5)
-        self.assertTrue(len(predictions_with_neg)==5)
+        self.assertTrue(len(predictions_with_neg) == 5)
 
-        #out-of-vobaculary scenario
+        # out-of-vobaculary scenario
         predictions_out_of_vocab = model_with_neg.predict_output_word(['some', 'random', 'words'], topn=5)
         self.assertEqual(predictions_out_of_vocab, None)
 
-        #when required model parameters have been deleted
+        # when required model parameters have been deleted
         model_with_neg.init_sims()
         model_with_neg.wv.save_word2vec_format(testfile(), binary=True)
         kv_model_with_neg = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), binary=True)
@@ -639,7 +638,7 @@ def testPredictOutputWord(self):
         binary_model_with_neg.wv = kv_model_with_neg
         self.assertRaises(RuntimeError, binary_model_with_neg.predict_output_word, ['system', 'human'])
 
-        #negative sampling scheme not used
+        # negative sampling scheme not used
         model_without_neg = word2vec.Word2Vec(sentences, min_count=1, negative=0)
         self.assertRaises(RuntimeError, model_without_neg.predict_output_word, ['system', 'human'])
 
@@ -679,6 +678,7 @@ def test_train_with_explicit_param(self):
 
         with self.assertRaises(ValueError):
             model.train(sentences)
+
     def test_sentences_should_not_be_a_generator(self):
         """
         Is sentences a generator object?
@@ -706,7 +706,7 @@ def test_compute_training_loss(self):
         self.assertTrue(training_loss_val > 0.0)
 
 
-#endclass TestWord2VecModel
+# endclass TestWord2VecModel
 
 class TestWMD(unittest.TestCase):
     def testNonzero(self):
@@ -791,7 +791,7 @@ def testPathLineSentencesOneFile(self):
                 self.assertEqual(words, utils.to_unicode(orig.readline()).split())
 
 
-#endclass TestWord2VecSentenceIterators
+# endclass TestWord2VecSentenceIterators
 
 # TODO: get correct path to Python binary
 # class TestWord2VecScripts(unittest.TestCase):
diff --git a/gensim/test/test_wordrank_wrapper.py b/gensim/test/test_wordrank_wrapper.py
index 5b8260fdf2..8f8d5b5f9d 100644
--- a/gensim/test/test_wordrank_wrapper.py
+++ b/gensim/test/test_wordrank_wrapper.py
@@ -18,13 +18,15 @@
 
 from gensim.models.wrappers import wordrank
 
-module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder
+module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
+
 def testfile():
     # temporary model will be stored to this file
     return os.path.join(tempfile.gettempdir(), 'gensim_wordrank.test')
 
+
 class TestWordrank(unittest.TestCase):
     def setUp(self):
         wr_home = os.environ.get('WR_HOME', None)
@@ -42,7 +44,7 @@ def testLoadWordrankFormat(self):
         vocab_size, dim = 76, 50
         self.assertEqual(model.syn0.shape, (vocab_size, dim))
         self.assertEqual(len(model.vocab), vocab_size)
-        os.remove(self.wr_file+'.w2vformat')
+        os.remove(self.wr_file + '.w2vformat')
 
     def testEnsemble(self):
         """Test ensemble of two embeddings"""
@@ -50,7 +52,7 @@ def testEnsemble(self):
             return
         new_emb = self.test_model.ensemble_embedding(self.wr_file, self.wr_file)
         self.assertEqual(new_emb.shape, (76, 50))
-        os.remove(self.wr_file+'.w2vformat')
+        os.remove(self.wr_file + '.w2vformat')
 
     def testPersistence(self):
         """Test storing/loading the entire model"""
@@ -77,8 +79,7 @@ def models_equal(self, model, model2):
         self.assertEqual(set(model.vocab.keys()), set(model2.vocab.keys()))
         self.assertTrue(numpy.allclose(model.syn0, model2.syn0))
 
+
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
     unittest.main()
-
-    
\ No newline at end of file
diff --git a/gensim/topic_coherence/aggregation.py b/gensim/topic_coherence/aggregation.py
index 341834c92f..065943a28f 100644
--- a/gensim/topic_coherence/aggregation.py
+++ b/gensim/topic_coherence/aggregation.py
@@ -14,6 +14,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 def arithmetic_mean(confirmed_measures):
     """
     This functoin performs the arithmetic mean aggregation on the output obtained from
diff --git a/gensim/topic_coherence/segmentation.py b/gensim/topic_coherence/segmentation.py
index 4845a26859..9097036914 100644
--- a/gensim/topic_coherence/segmentation.py
+++ b/gensim/topic_coherence/segmentation.py
@@ -9,10 +9,10 @@
 """
 
 import logging
-import numpy as np
 
 logger = logging.getLogger(__name__)
 
+
 def s_one_pre(topics):
     """
     This function performs s_one_pre segmentation on a list of topics.
@@ -40,6 +40,7 @@ def s_one_pre(topics):
 
     return s_one_pre
 
+
 def s_one_one(topics):
     """
     This function performs s_one_one segmentation on a list of topics.
@@ -70,6 +71,7 @@ def s_one_one(topics):
 
     return s_one_one
 
+
 def s_one_set(topics):
     """
     This function performs s_one_set segmentation on a list of topics.
diff --git a/gensim/topic_coherence/text_analysis.py b/gensim/topic_coherence/text_analysis.py
index 351f15a932..7305fe9792 100644
--- a/gensim/topic_coherence/text_analysis.py
+++ b/gensim/topic_coherence/text_analysis.py
@@ -438,7 +438,7 @@ def run(self):
             logger.info(
                 "%s interrupted after processing %d documents",
                 self.__class__.__name__, self.accumulator.num_docs)
-        except:
+        except Exception:
             logger.exception("worker encountered unexpected exception")
         finally:
             self.reply_to_master()
diff --git a/gensim/utils.py b/gensim/utils.py
index cb7a204511..47d7bc98cd 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -13,8 +13,6 @@
 import logging
 import warnings
 
-logger = logging.getLogger(__name__)
-
 try:
     from html.entities import name2codepoint as n2cp
 except ImportError:
@@ -41,11 +39,13 @@
 import numbers
 import scipy.sparse
 
+from six import iterkeys, iteritems, u, string_types, unichr
+from six.moves import xrange
+
 if sys.version_info[0] >= 3:
     unicode = str
 
-from six import iterkeys, iteritems, u, string_types, unichr
-from six.moves import xrange
+logger = logging.getLogger(__name__)
 
 try:
     from smart_open import smart_open
@@ -129,6 +129,8 @@ def __enter__(self):
 
     def __exit__(self, type, value, traceback):
         pass
+
+
 nocm = NoCM()
 
 
@@ -230,6 +232,8 @@ def any2utf8(text, errors='strict', encoding='utf8'):
         return text.encode('utf8')
     # do bytestring -> unicode -> utf8 full circle, to ensure valid utf8
     return unicode(text, encoding, errors=errors).encode('utf8')
+
+
 to_utf8 = any2utf8
 
 
@@ -238,6 +242,8 @@ def any2unicode(text, encoding='utf8', errors='strict'):
     if isinstance(text, unicode):
         return text
     return unicode(text, encoding, errors=errors)
+
+
 to_unicode = any2unicode
 
 
@@ -462,7 +468,7 @@ def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol,
             self.__dict__['__scipys'] = scipys
             self.__dict__['__ignoreds'] = ignoreds
             self.__dict__['__recursive_saveloads'] = recursive_saveloads
-        except:
+        except Exception:
             # restore the attributes if exception-interrupted
             for attrib, val in iteritems(asides):
                 setattr(self, attrib, val)
@@ -502,7 +508,7 @@ def save(self, fname_or_handle, separately=None, sep_limit=10 * 1024**2,
         except TypeError:  # `fname_or_handle` does not have write attribute
             self._smart_save(fname_or_handle, separately, sep_limit, ignore,
                              pickle_protocol=pickle_protocol)
-#endclass SaveLoad
+# endclass SaveLoad
 
 
 def identity(p):
@@ -532,6 +538,7 @@ class FakeDict(object):
     is a waste of memory.
 
     """
+
     def __init__(self, num_terms):
         self.num_terms = num_terms
 
@@ -599,7 +606,7 @@ def is_corpus(obj):
     try:
         if 'Corpus' in obj.__class__.__name__:  # the most common case, quick hack
             return True, obj
-    except:
+    except Exception:
         pass
     try:
         if hasattr(obj, 'next') or hasattr(obj, '__next__'):
@@ -637,14 +644,14 @@ def get_my_ip():
         s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
         s.connect((ns._pyroUri.host, ns._pyroUri.port))
         result, port = s.getsockname()
-    except:
+    except Exception:
         try:
             # see what ifconfig says about our default interface
             import commands
             result = commands.getoutput("ifconfig").split("\n")[1].split()[1][5:]
             if len(result.split('.')) != 4:
                 raise Exception()
-        except:
+        except Exception:
             # give up, leave the resolution to gethostbyname
             result = socket.gethostbyname(socket.gethostname())
     return result
@@ -655,6 +662,7 @@ class RepeatCorpus(SaveLoad):
     Used in the tutorial on distributed computing and likely not useful anywhere else.
 
     """
+
     def __init__(self, corpus, reps):
         """
         Wrap a `corpus` as another corpus of length `reps`. This is achieved by
@@ -794,7 +802,7 @@ def substitute_entity(match):
                     return safe_unichr(cp)
                 else:
                     return match.group()
-        except:
+        except Exception:
             # in case of errors, return original input
             return match.group()
 
@@ -823,6 +831,7 @@ def chunkize_serial(iterable, chunksize, as_numpy=False):
         # memory opt: wrap the chunk and then pop(), to avoid leaving behind a dangling reference
         yield wrapped_chunk.pop()
 
+
 grouper = chunkize_serial
 
 
@@ -858,7 +867,7 @@ def run(self):
             logger.debug("prepared another chunk of %i documents (qsize=%s)" %
                         (len(wrapped_chunk[0]), qsize))
             self.q.put(wrapped_chunk.pop(), block=True)
-#endclass InputQueue
+# endclass InputQueue
 
 
 if os.name == 'nt':
@@ -1039,7 +1048,7 @@ def has_pattern():
     Function which returns a flag indicating whether pattern is installed or not
     """
     try:
-        from pattern.en import parse
+        from pattern.en import parse  # noqa:F401
         return True
     except ImportError:
         return False
@@ -1142,6 +1151,7 @@ def qsize(queue):
         # OS X doesn't support qsize
         return -1
 
+
 RULE_DEFAULT = 0
 RULE_DISCARD = 1
 RULE_KEEP = 2
diff --git a/setup.py b/setup.py
index 6097e34c5a..afeff174cd 100644
--- a/setup.py
+++ b/setup.py
@@ -13,21 +13,22 @@
 import os
 import sys
 import warnings
-import io
-
-if sys.version_info[:2] < (2, 7) or (sys.version_info[:1] == 3 and sys.version_info[:2] < (3, 5)):
-    raise Exception('This version of gensim needs Python 2.7, 3.5 or later.')
 
 import ez_setup
-ez_setup.use_setuptools()
 from setuptools import setup, find_packages, Extension
 from setuptools.command.build_ext import build_ext
 
+if sys.version_info[:2] < (2, 7) or (sys.version_info[:1] == 3 and sys.version_info[:2] < (3, 5)):
+    raise Exception('This version of gensim needs Python 2.7, 3.5 or later.')
+
+ez_setup.use_setuptools()
 
 # the following code is adapted from tornado's setup.py:
 # https://github.com/tornadoweb/tornado/blob/master/setup.py
 # to support installing without the extension on platforms where
 # no compiler is available.
+
+
 class custom_build_ext(build_ext):
     """Allow C extension building to fail.
 
@@ -89,17 +90,16 @@ def build_extension(self, ext):
     # importing numpy directly in this script, before it's actually installed!
     # http://stackoverflow.com/questions/19919905/how-to-bootstrap-numpy-installation-in-setup-py
     def finalize_options(self):
-            build_ext.finalize_options(self)
-            # Prevent numpy from thinking it is still in its setup process:
-            # https://docs.python.org/2/library/__builtin__.html#module-__builtin__
-            if isinstance(__builtins__, dict):
-                __builtins__["__NUMPY_SETUP__"] = False
-            else:
-                __builtins__.__NUMPY_SETUP__ = False
-
-            import numpy
-            self.include_dirs.append(numpy.get_include())
+        build_ext.finalize_options(self)
+        # Prevent numpy from thinking it is still in its setup process:
+        # https://docs.python.org/2/library/__builtin__.html#module-__builtin__
+        if isinstance(__builtins__, dict):
+            __builtins__["__NUMPY_SETUP__"] = False
+        else:
+            __builtins__.__NUMPY_SETUP__ = False
 
+        import numpy
+        self.include_dirs.append(numpy.get_include())
 
 
 model_dir = os.path.join(os.path.dirname(__file__), 'gensim', 'models')
@@ -113,7 +113,6 @@ def finalize_options(self):
     cmdclass.update(vars(wheelhouse_uploader.cmd))
 
 
-
 LONG_DESCRIPTION = u"""
 ==============================================
 gensim -- Topic Modelling in Python