diff --git a/continuous_integration/travis/flake8_diff.sh b/continuous_integration/travis/flake8_diff.sh
index 2f04ab3e31..37d12de61d 100755
--- a/continuous_integration/travis/flake8_diff.sh
+++ b/continuous_integration/travis/flake8_diff.sh
@@ -19,18 +19,18 @@ set -e
 set -o pipefail
 
 PROJECT=RaRe-Technologies/gensim
-PROJECT_URL=https://github.com/$PROJECT.git
+PROJECT_URL=https://github.com/${PROJECT}.git
 
 # Find the remote with the project name (upstream in most cases)
-REMOTE=$(git remote -v | grep $PROJECT | cut -f1 | head -1 || echo '')
+REMOTE=$(git remote -v | grep ${PROJECT} | cut -f1 | head -1 || echo '')
 
 # Add a temporary remote if needed. For example this is necessary when
 # Travis is configured to run in a fork. In this case 'origin' is the
 # fork and not the reference repo we want to diff against.
 if [[ -z "$REMOTE" ]]; then
     TMP_REMOTE=tmp_reference_upstream
-    REMOTE=$TMP_REMOTE
-    git remote add $REMOTE $PROJECT_URL
+    REMOTE=${TMP_REMOTE}
+    git remote add ${REMOTE} ${PROJECT_URL}
 fi
 
 echo "Remotes:"
@@ -56,15 +56,15 @@ if [[ "$TRAVIS" == "true" ]]; then
                 echo "New branch, no commit range from Travis so passing this test by convention"
                 exit 0
             fi
-            COMMIT_RANGE=$TRAVIS_COMMIT_RANGE
+            COMMIT_RANGE=${TRAVIS_COMMIT_RANGE}
         fi
     else
         # We want to fetch the code as it is in the PR branch and not
         # the result of the merge into develop. This way line numbers
         # reported by Travis will match with the local code.
-        LOCAL_BRANCH_REF=travis_pr_$TRAVIS_PULL_REQUEST
+        LOCAL_BRANCH_REF=travis_pr_${TRAVIS_PULL_REQUEST}
         # In Travis the PR target is always origin
-        git fetch origin pull/$TRAVIS_PULL_REQUEST/head:refs/$LOCAL_BRANCH_REF
+        git fetch origin pull/${TRAVIS_PULL_REQUEST}/head:refs/${LOCAL_BRANCH_REF}
     fi
 fi
 
@@ -76,34 +76,34 @@ if [[ -z "$COMMIT_RANGE" ]]; then
     fi
     echo -e "\nLast 2 commits in $LOCAL_BRANCH_REF:"
     echo '--------------------------------------------------------------------------------'
-    git log -2 $LOCAL_BRANCH_REF
+    git log -2 ${LOCAL_BRANCH_REF}
 
     REMOTE_MASTER_REF="$REMOTE/develop"
     # Make sure that $REMOTE_MASTER_REF is a valid reference
     echo -e "\nFetching $REMOTE_MASTER_REF"
     echo '--------------------------------------------------------------------------------'
-    git fetch $REMOTE develop:refs/remotes/$REMOTE_MASTER_REF
-    LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short $LOCAL_BRANCH_REF)
-    REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short $REMOTE_MASTER_REF)
+    git fetch ${REMOTE} develop:refs/remotes/${REMOTE_MASTER_REF}
+    LOCAL_BRANCH_SHORT_HASH=$(git rev-parse --short ${LOCAL_BRANCH_REF})
+    REMOTE_MASTER_SHORT_HASH=$(git rev-parse --short ${REMOTE_MASTER_REF})
 
-    COMMIT=$(git merge-base $LOCAL_BRANCH_REF $REMOTE_MASTER_REF) || \
-        echo "No common ancestor found for $(git show $LOCAL_BRANCH_REF -q) and $(git show $REMOTE_MASTER_REF -q)"
+    COMMIT=$(git merge-base ${LOCAL_BRANCH_REF} ${REMOTE_MASTER_REF}) || \
+        echo "No common ancestor found for $(git show ${LOCAL_BRANCH_REF} -q) and $(git show ${REMOTE_MASTER_REF} -q)"
 
     if [ -z "$COMMIT" ]; then
         exit 1
     fi
 
-    COMMIT_SHORT_HASH=$(git rev-parse --short $COMMIT)
+    COMMIT_SHORT_HASH=$(git rev-parse --short ${COMMIT})
 
     echo -e "\nCommon ancestor between $LOCAL_BRANCH_REF ($LOCAL_BRANCH_SHORT_HASH)"\
          "and $REMOTE_MASTER_REF ($REMOTE_MASTER_SHORT_HASH) is $COMMIT_SHORT_HASH:"
     echo '--------------------------------------------------------------------------------'
-    git show --no-patch $COMMIT_SHORT_HASH
+    git show --no-patch ${COMMIT_SHORT_HASH}
 
     COMMIT_RANGE="$COMMIT_SHORT_HASH..$LOCAL_BRANCH_SHORT_HASH"
 
     if [[ -n "$TMP_REMOTE" ]]; then
-        git remote remove $TMP_REMOTE
+        git remote remove ${TMP_REMOTE}
     fi
 
 else
@@ -111,19 +111,19 @@ else
 fi
 
 echo -e '\nRunning flake8 on the diff in the range' "$COMMIT_RANGE" \
-     "($(git rev-list $COMMIT_RANGE | wc -l) commit(s)):"
+     "($(git rev-list ${COMMIT_RANGE} | wc -l) commit(s)):"
 echo '--------------------------------------------------------------------------------'
 
 # We ignore files from sklearn/externals.
 # Excluding vec files since they contain non-utf8 content and flake8 raises exception for non-utf8 input
 # We need the following command to exit with 0 hence the echo in case
 # there is no match
-MODIFIED_PY_FILES="$(git diff --name-only $COMMIT_RANGE | grep '[a-zA-Z0-9]*.py$' || echo "no_match")"
-MODIFIED_IPYNB_FILES="$(git diff --name-only $COMMIT_RANGE | grep '[a-zA-Z0-9]*.ipynb$' || echo "no_match")"
+MODIFIED_PY_FILES="$(git diff --name-only ${COMMIT_RANGE} | grep '[a-zA-Z0-9]*.py$' || echo "no_match")"
+MODIFIED_IPYNB_FILES="$(git diff --name-only ${COMMIT_RANGE} | grep '[a-zA-Z0-9]*.ipynb$' || echo "no_match")"
 
 
-echo "*.py files: " $MODIFIED_PY_FILES
-echo "*.ipynb files: " $MODIFIED_IPYNB_FILES
+echo "*.py files: " ${MODIFIED_PY_FILES}
+echo "*.ipynb files: " ${MODIFIED_IPYNB_FILES}
 
 
 check_files() {
@@ -133,7 +133,7 @@ check_files() {
     if [ -n "$files" ]; then
         # Conservative approach: diff without context (--unified=0) so that code
         # that was not changed does not create failures
-        git diff --unified=0 $COMMIT_RANGE -- $files | flake8 --diff --show-source $options
+        git diff --unified=0 ${COMMIT_RANGE} -- ${files} | flake8 --diff --show-source ${options}
     fi
 }
 
@@ -150,6 +150,6 @@ else
     for fname in ${MODIFIED_IPYNB_FILES}
     do
         echo "File: $fname"
-        jupyter nbconvert --to script --stdout $fname | flake8 - --show-source --ignore=E501,E731,E12,W503,E402 --builtins=get_ipython || true
+        jupyter nbconvert --to script --stdout ${fname} | flake8 - --show-source --ignore=E501,E731,E12,W503,E402 --builtins=get_ipython || true
     done
 fi
diff --git a/continuous_integration/travis/install.sh b/continuous_integration/travis/install.sh
index 1ba1796a4b..c14ac86925 100755
--- a/continuous_integration/travis/install.sh
+++ b/continuous_integration/travis/install.sh
@@ -9,5 +9,5 @@ export PATH=/home/travis/miniconda2/bin:$PATH
 conda update --yes conda
 
 
-conda create --yes -n gensim-test python=$PYTHON_VERSION pip atlas flake8 jupyter numpy==$NUMPY_VERSION scipy==$SCIPY_VERSION && source activate gensim-test
+conda create --yes -n gensim-test python=${PYTHON_VERSION} pip atlas flake8 jupyter numpy==${NUMPY_VERSION} scipy==${SCIPY_VERSION} && source activate gensim-test
 pip install . && pip install .[test]
diff --git a/docker/start_jupyter_notebook.sh b/docker/start_jupyter_notebook.sh
index 4c5946d056..7893536dd6 100644
--- a/docker/start_jupyter_notebook.sh
+++ b/docker/start_jupyter_notebook.sh
@@ -4,4 +4,4 @@ PORT=$1
 NOTEBOOK_DIR=/gensim/docs/notebooks
 DEFAULT_URL=/notebooks/gensim%20Quick%20Start.ipynb
 
-jupyter notebook --no-browser --ip=* --port=$PORT --allow-root --notebook-dir=$NOTEBOOK_DIR --NotebookApp.token=\"\" --NotebookApp.default_url=$DEFAULT_URL
\ No newline at end of file
+jupyter notebook --no-browser --ip=* --port=${PORT} --allow-root --notebook-dir=${NOTEBOOK_DIR} --NotebookApp.token=\"\" --NotebookApp.default_url=${DEFAULT_URL}
diff --git a/docs/notebooks/test_notebooks.py b/docs/notebooks/test_notebooks.py
index 05dd7082f5..77633b7037 100644
--- a/docs/notebooks/test_notebooks.py
+++ b/docs/notebooks/test_notebooks.py
@@ -28,16 +28,11 @@ def _notebook_run(path):
                     print(str(e.traceback).split("\n")[-2])
                 else:
                     raise e
-            except TimeoutError as e:
+            except RuntimeError as e:
                 print(e)
 
             finally:
                 nbformat.write(nb, fout)
-        #nb = nbformat.read(fout, nbformat.current_nbformat)
-
-    #errors = errors.extend(
-        #[output for cell in nb.cells if "outputs" in cell
-        # for output in cell["outputs"] if output.output_type == "error"])
 
     return nb, errors
 
diff --git a/docs/src/conf.py b/docs/src/conf.py
index 622db5fec7..b1557c906c 100644
--- a/docs/src/conf.py
+++ b/docs/src/conf.py
@@ -11,7 +11,8 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import sys, os
+import os
+import sys
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -34,7 +35,7 @@
 source_suffix = '.rst'
 
 # The encoding of source files.
-#source_encoding = 'utf-8'
+# source_encoding = 'utf-8'
 
 # The master toctree document.
 master_doc = 'indextoc'
@@ -58,67 +59,67 @@
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
-#language = None
+# language = None
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of documents that shouldn't be included in the build.
-#unused_docs = []
+# unused_docs = []
 
 # List of directories, relative to source directory, that shouldn't be searched
 # for source files.
 exclude_trees = ['_build']
 
 # The reST default role (used for this markup: `text`) to use for all documents.
-#default_role = None
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 
 # -- Options for HTML output ---------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  Major themes that come with
 # Sphinx are currently 'default' and 'sphinxdoc'.
-#html_theme = 'default'
+# html_theme = 'default'
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#main_colour = "#ffbbbb"
+# main_colour = "#ffbbbb"
 
 html_theme_options = {
-#"rightsidebar": "false",
-#"stickysidebar": "true",
-#"bodyfont": "'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', 'Verdana', 'sans-serif'",
-#"headfont": "'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', 'Verdana', 'sans-serif'",
-#"sidebarbgcolor": "fuckyou",
-#"footerbgcolor": "#771111",
-#"relbarbgcolor": "#993333",
-#"sidebartextcolor": "#000000",
-#"sidebarlinkcolor": "#330000",
-#"codebgcolor": "#fffff0",
-#"headtextcolor": "#000080",
-#"headbgcolor": "#f0f0ff",
-#"bgcolor": "#ffffff",
+# "rightsidebar": "false",
+# "stickysidebar": "true",
+# "bodyfont": "'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', 'Verdana', 'sans-serif'",
+# "headfont": "'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', 'Verdana', 'sans-serif'",
+# "sidebarbgcolor": "fuckyou",
+# "footerbgcolor": "#771111",
+# "relbarbgcolor": "#993333",
+# "sidebartextcolor": "#000000",
+# "sidebarlinkcolor": "#330000",
+# "codebgcolor": "#fffff0",
+# "headtextcolor": "#000080",
+# "headbgcolor": "#f0f0ff",
+# "bgcolor": "#ffffff",
 }
 
 
@@ -130,11 +131,11 @@
 html_title = "gensim"
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = ''
+# html_short_title = ''
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-#html_logo = None
+# html_logo = None
 
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
@@ -152,17 +153,17 @@
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-html_sidebars = {} #{'index': ['download.html', 'globaltoc.html', 'searchbox.html', 'indexsidebar.html']}
-#html_sidebars = {'index': ['globaltoc.html', 'searchbox.html']}
+html_sidebars = {}  # {'index': ['download.html', 'globaltoc.html', 'searchbox.html', 'indexsidebar.html']}
+# html_sidebars = {'index': ['globaltoc.html', 'searchbox.html']}
 
 # If false, no module index is generated.
-#html_use_modindex = True
+# html_use_modindex = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
 html_split_index = False
@@ -175,10 +176,10 @@
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = ''
+# html_file_suffix = ''
 
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'gensimdoc'
@@ -188,32 +189,30 @@
 # -- Options for LaTeX output --------------------------------------------------
 
 # The paper size ('letter' or 'a4').
-#latex_paper_size = 'letter'
+# latex_paper_size = 'letter'
 
 # The font size ('10pt', '11pt' or '12pt').
-#latex_font_size = '10pt'
+# latex_font_size = '10pt'
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
-latex_documents = [
-    ('index', 'gensim.tex', u'gensim Documentation', u'Radim Řehůřek', 'manual'),
-]
+latex_documents = [('index', 'gensim.tex', u'gensim Documentation', u'Radim Řehůřek', 'manual')]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
-#latex_logo = None
+# latex_logo = None
 
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
 latex_use_parts = False
 
 # Additional stuff for the LaTeX preamble.
-#latex_preamble = ''
+# latex_preamble = ''
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_use_modindex = True
+# latex_use_modindex = True
 
 suppress_warnings = ['image.nonlocal_uri', 'ref.citation', 'ref.footnote']
diff --git a/gensim/__init__.py b/gensim/__init__.py
index c267afe4de..3923460991 100644
--- a/gensim/__init__.py
+++ b/gensim/__init__.py
@@ -3,7 +3,7 @@
 similarities within a corpus of documents.
 """
 
-from gensim import parsing, matutils, interfaces, corpora, models, similarities, summarization  # noqa:F401
+from gensim import parsing, matutils, interfaces, corpora, models, similarities, summarization, utils  # noqa:F401
 import logging
 
 __version__ = '2.3.0'
diff --git a/gensim/corpora/bleicorpus.py b/gensim/corpora/bleicorpus.py
index 327a36fc14..6bd96da716 100644
--- a/gensim/corpora/bleicorpus.py
+++ b/gensim/corpora/bleicorpus.py
@@ -45,7 +45,7 @@ def __init__(self, fname, fname_vocab=None):
         `fname.vocab`.
         """
         IndexedCorpus.__init__(self, fname)
-        logger.info("loading corpus from %s" % fname)
+        logger.info("loading corpus from %s", fname)
 
         if fname_vocab is None:
             fname_base, _ = path.splitext(fname)
@@ -102,7 +102,7 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
         else:
             num_terms = 1 + max([-1] + id2word.keys())
 
-        logger.info("storing corpus in Blei's LDA-C format into %s" % fname)
+        logger.info("storing corpus in Blei's LDA-C format into %s", fname)
         with utils.smart_open(fname, 'wb') as fout:
             offsets = []
             for doc in corpus:
@@ -113,7 +113,7 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
 
         # write out vocabulary, in a format compatible with Blei's topics.py script
         fname_vocab = utils.smart_extension(fname, '.vocab')
-        logger.info("saving vocabulary of %i words to %s" % (num_terms, fname_vocab))
+        logger.info("saving vocabulary of %i words to %s", num_terms, fname_vocab)
         with utils.smart_open(fname_vocab, 'wb') as fout:
             for featureid in xrange(num_terms):
                 fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))
@@ -127,5 +127,3 @@ def docbyoffset(self, offset):
         with utils.smart_open(self.fname) as f:
             f.seek(offset)
             return self.line2doc(f.readline())
-
-# endclass BleiCorpus
diff --git a/gensim/corpora/csvcorpus.py b/gensim/corpora/csvcorpus.py
index 6d3288bc0f..969437e571 100644
--- a/gensim/corpora/csvcorpus.py
+++ b/gensim/corpora/csvcorpus.py
@@ -36,7 +36,7 @@ def __init__(self, fname, labels):
         `labels` = are class labels present in the input file? => skip the first column
 
         """
-        logger.info("loading corpus from %s" % fname)
+        logger.info("loading corpus from %s", fname)
         self.fname = fname
         self.length = None
         self.labels = labels
@@ -45,7 +45,7 @@ def __init__(self, fname, labels):
         head = ''.join(itertools.islice(utils.smart_open(self.fname), 5))
         self.headers = csv.Sniffer().has_header(head)
         self.dialect = csv.Sniffer().sniff(head)
-        logger.info("sniffed CSV delimiter=%r, headers=%s" % (self.dialect.delimiter, self.headers))
+        logger.info("sniffed CSV delimiter=%r, headers=%s", self.dialect.delimiter, self.headers)
 
     def __iter__(self):
         """
@@ -60,8 +60,6 @@ def __iter__(self):
         for line_no, line in enumerate(reader):
             if self.labels:
                 line.pop(0)  # ignore the first column = class label
-            yield list(enumerate(map(float, line)))
+            yield list(enumerate(float(x) for x in line))
 
         self.length = line_no + 1  # store the total number of CSV rows = documents
-
-# endclass CsvCorpus
diff --git a/gensim/corpora/dictionary.py b/gensim/corpora/dictionary.py
index d32276688b..08c4097f03 100644
--- a/gensim/corpora/dictionary.py
+++ b/gensim/corpora/dictionary.py
@@ -61,7 +61,7 @@ def __getitem__(self, tokenid):
         if len(self.id2token) != len(self.token2id):
             # the word->id mapping has changed (presumably via add_documents);
             # recompute id->word accordingly
-            self.id2token = dict((v, k) for k, v in iteritems(self.token2id))
+            self.id2token = utils.revdict(self.token2id)
         return self.id2token[tokenid]  # will throw for non-existent ids
 
     def __iter__(self):
@@ -120,7 +120,8 @@ def add_documents(self, documents, prune_at=2000000):
 
         logger.info(
             "built %s from %i documents (total %i corpus positions)",
-            self, self.num_docs, self.num_pos)
+            self, self.num_docs, self.num_pos
+        )
 
     def doc2bow(self, document, allow_update=False, return_missing=False):
         """
@@ -147,14 +148,14 @@ def doc2bow(self, document, allow_update=False, return_missing=False):
 
         token2id = self.token2id
         if allow_update or return_missing:
-            missing = dict((w, freq) for w, freq in iteritems(counter) if w not in token2id)
+            missing = {w: freq for w, freq in iteritems(counter) if w not in token2id}
             if allow_update:
                 for w in missing:
                     # new id = number of ids made so far;
                     # NOTE this assumes there are no gaps in the id sequence!
                     token2id[w] = len(token2id)
 
-        result = dict((token2id[w], freq) for w, freq in iteritems(counter) if w in token2id)
+        result = {token2id[w]: freq for w, freq in iteritems(counter) if w in token2id}
 
         if allow_update:
             self.num_docs += 1
@@ -201,15 +202,17 @@ def filter_extremes(self, no_below=5, no_above=0.5, keep_n=100000, keep_tokens=N
         else:
             good_ids = (
                 v for v in itervalues(self.token2id)
-                if no_below <= self.dfs.get(v, 0) <= no_above_abs)
+                if no_below <= self.dfs.get(v, 0) <= no_above_abs
+            )
         good_ids = sorted(good_ids, key=self.dfs.get, reverse=True)
         if keep_n is not None:
             good_ids = good_ids[:keep_n]
-        bad_words = [(self[id], self.dfs.get(id, 0)) for id in set(self).difference(good_ids)]
+        bad_words = [(self[idx], self.dfs.get(idx, 0)) for idx in set(self).difference(good_ids)]
         logger.info("discarding %i tokens: %s...", len(self) - len(good_ids), bad_words[:10])
         logger.info(
             "keeping %i tokens which were in no less than %i and no more than %i (=%.1f%%) documents",
-            len(good_ids), no_below, no_above_abs, 100.0 * no_above)
+            len(good_ids), no_below, no_above_abs, 100.0 * no_above
+        )
 
         # do the actual filtering, then rebuild dictionary to remove gaps in ids
         self.filter_tokens(good_ids=good_ids)
@@ -229,11 +232,11 @@ def filter_n_most_frequent(self, remove_n):
         most_frequent_ids = sorted(most_frequent_ids, key=self.dfs.get, reverse=True)
         most_frequent_ids = most_frequent_ids[:remove_n]
         # do the actual filtering, then rebuild dictionary to remove gaps in ids
-        most_frequent_words = [(self[id], self.dfs.get(id, 0)) for id in most_frequent_ids]
+        most_frequent_words = [(self[idx], self.dfs.get(idx, 0)) for idx in most_frequent_ids]
         logger.info("discarding %i tokens: %s...", len(most_frequent_ids), most_frequent_words[:10])
 
         self.filter_tokens(bad_ids=most_frequent_ids)
-        logger.info("resulting dictionary: %s" % self)
+        logger.info("resulting dictionary: %s", self)
 
     def filter_tokens(self, bad_ids=None, good_ids=None):
         """
@@ -244,20 +247,12 @@ def filter_tokens(self, bad_ids=None, good_ids=None):
         """
         if bad_ids is not None:
             bad_ids = set(bad_ids)
-            self.token2id = dict((token, tokenid)
-                                 for token, tokenid in iteritems(self.token2id)
-                                 if tokenid not in bad_ids)
-            self.dfs = dict((tokenid, freq)
-                            for tokenid, freq in iteritems(self.dfs)
-                            if tokenid not in bad_ids)
+            self.token2id = {token: tokenid for token, tokenid in iteritems(self.token2id) if tokenid not in bad_ids}
+            self.dfs = {tokenid: freq for tokenid, freq in iteritems(self.dfs) if tokenid not in bad_ids}
         if good_ids is not None:
             good_ids = set(good_ids)
-            self.token2id = dict((token, tokenid)
-                                 for token, tokenid in iteritems(self.token2id)
-                                 if tokenid in good_ids)
-            self.dfs = dict((tokenid, freq)
-                            for tokenid, freq in iteritems(self.dfs)
-                            if tokenid in good_ids)
+            self.token2id = {token: tokenid for token, tokenid in iteritems(self.token2id) if tokenid in good_ids}
+            self.dfs = {tokenid: freq for tokenid, freq in iteritems(self.dfs) if tokenid in good_ids}
         self.compactify()
 
     def compactify(self):
@@ -274,9 +269,9 @@ def compactify(self):
         idmap = dict(izip(itervalues(self.token2id), xrange(len(self.token2id))))
 
         # reassign mappings to new ids
-        self.token2id = dict((token, idmap[tokenid]) for token, tokenid in iteritems(self.token2id))
+        self.token2id = {token: idmap[tokenid] for token, tokenid in iteritems(self.token2id)}
         self.id2token = {}
-        self.dfs = dict((idmap[tokenid], freq) for tokenid, freq in iteritems(self.dfs))
+        self.dfs = {idmap[tokenid]: freq for tokenid, freq in iteritems(self.dfs)}
 
     def save_as_text(self, fname, sort_by_word=True):
         """
@@ -405,15 +400,16 @@ def from_corpus(corpus, id2word=None):
 
         if id2word is None:
             # make sure length(result) == get_max_id(corpus) + 1
-            result.token2id = dict((unicode(i), i) for i in xrange(max_id + 1))
+            result.token2id = {unicode(i): i for i in xrange(max_id + 1)}
         else:
             # id=>word mapping given: simply copy it
-            result.token2id = dict((utils.to_unicode(token), id) for id, token in iteritems(id2word))
-        for id in itervalues(result.token2id):
+            result.token2id = {utils.to_unicode(token): idx for idx, token in iteritems(id2word)}
+        for idx in itervalues(result.token2id):
             # make sure all token ids have a valid `dfs` entry
-            result.dfs[id] = result.dfs.get(id, 0)
+            result.dfs[idx] = result.dfs.get(idx, 0)
 
         logger.info(
             "built %s from %i documents (total %i corpus positions)",
-            result, result.num_docs, result.num_pos)
+            result, result.num_docs, result.num_pos
+        )
         return result
diff --git a/gensim/corpora/hashdictionary.py b/gensim/corpora/hashdictionary.py
index 63f966b3cd..687ec241ac 100644
--- a/gensim/corpora/hashdictionary.py
+++ b/gensim/corpora/hashdictionary.py
@@ -101,7 +101,7 @@ def keys(self):
         return range(len(self))
 
     def __str__(self):
-        return ("HashDictionary(%i id range)" % len(self))
+        return "HashDictionary(%i id range)" % len(self)
 
     @staticmethod
     def from_documents(*args, **kwargs):
@@ -117,11 +117,12 @@ def add_documents(self, documents):
         """
         for docno, document in enumerate(documents):
             if docno % 10000 == 0:
-                logger.info("adding document #%i to %s" % (docno, self))
+                logger.info("adding document #%i to %s", docno, self)
             self.doc2bow(document, allow_update=True)  # ignore the result, here we only care about updating token ids
         logger.info(
             "built %s from %i documents (total %i corpus positions)",
-            self, self.num_docs, self.num_pos)
+            self, self.num_docs, self.num_pos
+        )
 
     def doc2bow(self, document, allow_update=False, return_missing=False):
         """
@@ -182,24 +183,21 @@ def filter_extremes(self, no_below=5, no_above=0.5, keep_n=100000):
         """
         no_above_abs = int(no_above * self.num_docs)  # convert fractional threshold to absolute threshold
         ok = [item for item in iteritems(self.dfs_debug) if no_below <= item[1] <= no_above_abs]
-        ok = frozenset(word for word, freq in sorted(ok, key=lambda item: -item[1])[:keep_n])
-
-        self.dfs_debug = dict((word, freq)
-                              for word, freq in iteritems(self.dfs_debug)
-                              if word in ok)
-        self.token2id = dict((token, tokenid)
-                             for token, tokenid in iteritems(self.token2id)
-                             if token in self.dfs_debug)
-        self.id2token = dict((tokenid, set(token for token in tokens if token in self.dfs_debug))
-                             for tokenid, tokens in iteritems(self.id2token))
-        self.dfs = dict((tokenid, freq)
-                        for tokenid, freq in iteritems(self.dfs)
-                        if self.id2token.get(tokenid, set()))
+        ok = frozenset(word for word, freq in sorted(ok, key=lambda x: -x[1])[:keep_n])
+
+        self.dfs_debug = {word: freq for word, freq in iteritems(self.dfs_debug) if word in ok}
+        self.token2id = {token: tokenid for token, tokenid in iteritems(self.token2id) if token in self.dfs_debug}
+        self.id2token = {
+            tokenid: {token for token in tokens if token in self.dfs_debug}
+            for tokenid, tokens in iteritems(self.id2token)
+        }
+        self.dfs = {tokenid: freq for tokenid, freq in iteritems(self.dfs) if self.id2token.get(tokenid, set())}
 
         # for word->document frequency
         logger.info(
             "kept statistics for which were in no less than %i and no more than %i (=%.1f%%) documents",
-            no_below, no_above_abs, 100.0 * no_above)
+            no_below, no_above_abs, 100.0 * no_above
+        )
 
     def save_as_text(self, fname):
         """
@@ -216,6 +214,6 @@ def save_as_text(self, fname):
                 words = sorted(self[tokenid])
                 if words:
                     words_df = [(word, self.dfs_debug.get(word, 0)) for word in words]
-                    words_df = ["%s(%i)" % item for item in sorted(words_df, key=lambda item: -item[1])]
+                    words_df = ["%s(%i)" % item for item in sorted(words_df, key=lambda x: -x[1])]
                     words_df = '\t'.join(words_df)
                     fout.write(utils.to_utf8("%i\t%i\t%s\n" % (tokenid, self.dfs.get(tokenid, 0), words_df)))
diff --git a/gensim/corpora/indexedcorpus.py b/gensim/corpora/indexedcorpus.py
index 62f29b25ed..af79a2fd5f 100644
--- a/gensim/corpora/indexedcorpus.py
+++ b/gensim/corpora/indexedcorpus.py
@@ -50,7 +50,7 @@ def __init__(self, fname, index_fname=None):
             self.index = utils.unpickle(index_fname)
             # change self.index into a numpy.ndarray to support fancy indexing
             self.index = numpy.asarray(self.index)
-            logger.info("loaded corpus index from %s" % index_fname)
+            logger.info("loaded corpus index from %s", index_fname)
         except Exception:
             self.index = None
         self.length = None
@@ -95,15 +95,14 @@ def serialize(serializer, fname, corpus, id2word=None, index_fname=None, progres
                 offsets = serializer.save_corpus(fname, corpus, id2word, metadata=metadata)
 
         if offsets is None:
-            raise NotImplementedError("called serialize on class %s which doesn't support indexing!" %
-                serializer.__name__)
+            raise NotImplementedError("called serialize on class %s which doesn't support indexing!" % serializer.__name__)
 
         # store offsets persistently, using pickle
         # we shouldn't have to worry about self.index being a numpy.ndarray as the serializer will return
         # the offsets that are actually stored on disk - we're not storing self.index in any case, the
         # load just needs to turn whatever is loaded from disk back into a ndarray - this should also ensure
         # backwards compatibility
-        logger.info("saving %s index to %s" % (serializer.__name__, index_fname))
+        logger.info("saving %s index to %s", serializer.__name__, index_fname)
         utils.pickle(offsets, index_fname)
 
     def __len__(self):
@@ -115,7 +114,7 @@ def __len__(self):
             return len(self.index)
         if self.length is None:
             logger.info("caching corpus length")
-            self.length = sum(1 for doc in self)
+            self.length = sum(1 for _ in self)
         return self.length
 
     def __getitem__(self, docno):
@@ -128,6 +127,3 @@ def __getitem__(self, docno):
             return self.docbyoffset(self.index[docno])
         else:
             raise ValueError('Unrecognised value for docno, use either a single integer, a slice or a numpy.ndarray')
-
-
-# endclass IndexedCorpus
diff --git a/gensim/corpora/lowcorpus.py b/gensim/corpora/lowcorpus.py
index 315490cdcc..d5265f6571 100644
--- a/gensim/corpora/lowcorpus.py
+++ b/gensim/corpora/lowcorpus.py
@@ -15,7 +15,7 @@
 
 from gensim import utils
 from gensim.corpora import IndexedCorpus
-from six import iteritems, iterkeys
+from six import iterkeys
 from six.moves import xrange, zip as izip
 
 
@@ -63,7 +63,7 @@ def __init__(self, fname, id2word=None, line2words=split_on_space):
         simple splitting on spaces.
         """
         IndexedCorpus.__init__(self, fname)
-        logger.info("loading corpus from %s" % fname)
+        logger.info("loading corpus from %s", fname)
 
         self.fname = fname  # input file, see class doc for format
         self.line2words = line2words  # how to translate lines into words (simply split on space by default)
@@ -79,13 +79,15 @@ def __init__(self, fname, id2word=None, line2words=split_on_space):
             all_terms = sorted(all_terms)  # sort the list of all words; rank in that list = word's integer id
             self.id2word = dict(izip(xrange(len(all_terms)), all_terms))  # build a mapping of word id(int) -> word (string)
         else:
-            logger.info("using provided word mapping (%i ids)" % len(id2word))
+            logger.info("using provided word mapping (%i ids)", len(id2word))
             self.id2word = id2word
         self.num_terms = len(self.word2id)
         self.use_wordids = True  # return documents as (wordIndex, wordCount) 2-tuples
 
-        logger.info("loaded corpus with %i documents and %i terms from %s" %
-                     (self.num_docs, self.num_terms, fname))
+        logger.info(
+            "loaded corpus with %i documents and %i terms from %s",
+            self.num_docs, self.num_terms, fname
+        )
 
     def _calculate_num_docs(self):
         # the first line in input data is the number of documents (integer). throws exception on bad input.
@@ -118,12 +120,11 @@ def line2doc(self, line):
                     use_words.append(word)
                     marker.add(word)
             # construct a list of (wordIndex, wordFrequency) 2-tuples
-            doc = list(zip(map(self.word2id.get, use_words),
-                      map(words.count, use_words)))
+            doc = [(self.word2id.get(w), words.count(w)) for w in use_words]
         else:
             uniq_words = set(words)
             # construct a list of (word, wordFrequency) 2-tuples
-            doc = list(zip(uniq_words, map(words.count, uniq_words)))
+            doc = [(w, words.count(w)) for w in uniq_words]
 
         # return the document, then forget it and move on to the next one
         # note that this way, only one doc is stored in memory at a time, not the whole corpus
@@ -165,9 +166,10 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
                 fout.write(utils.to_utf8('%s\n' % ' '.join(words)))
 
         if truncated:
-            logger.warning("List-of-words format can only save vectors with "
-                            "integer elements; %i float entries were truncated to integer value" %
-                            truncated)
+            logger.warning(
+                "List-of-words format can only save vectors with integer elements; "
+                "%i float entries were truncated to integer value", truncated
+            )
         return offsets
 
     def docbyoffset(self, offset):
@@ -185,6 +187,4 @@ def id2word(self):
     @id2word.setter
     def id2word(self, val):
         self._id2word = val
-        self.word2id = dict((v, k) for k, v in iteritems(val))
-
-# endclass LowCorpus
+        self.word2id = utils.revdict(val)
diff --git a/gensim/corpora/malletcorpus.py b/gensim/corpora/malletcorpus.py
index 00333e9358..cacf0074bd 100644
--- a/gensim/corpora/malletcorpus.py
+++ b/gensim/corpora/malletcorpus.py
@@ -42,7 +42,7 @@ def __init__(self, fname, id2word=None, metadata=False):
 
     def _calculate_num_docs(self):
         with utils.smart_open(self.fname) as fin:
-            result = sum([1 for x in fin])
+            result = sum(1 for _ in fin)
         return result
 
     def __iter__(self):
@@ -85,7 +85,7 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
             logger.info("no word id mapping provided; initializing from corpus")
             id2word = utils.dict_from_corpus(corpus)
 
-        logger.info("storing corpus in Mallet format into %s" % fname)
+        logger.info("storing corpus in Mallet format into %s", fname)
 
         truncated = 0
         offsets = []
@@ -106,9 +106,10 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
                 fout.write(utils.to_utf8('%s %s %s\n' % (doc_id, doc_lang, ' '.join(words))))
 
         if truncated:
-            logger.warning("Mallet format can only save vectors with "
-                            "integer elements; %i float entries were truncated to integer value" %
-                            truncated)
+            logger.warning(
+                "Mallet format can only save vectors with integer elements; "
+                "%i float entries were truncated to integer value", truncated
+            )
 
         return offsets
 
@@ -119,5 +120,3 @@ def docbyoffset(self, offset):
         with utils.smart_open(self.fname) as f:
             f.seek(offset)
             return self.line2doc(f.readline())
-
-# endclass MalletCorpus
diff --git a/gensim/corpora/mmcorpus.py b/gensim/corpora/mmcorpus.py
index 08e809443b..2158f0a526 100644
--- a/gensim/corpora/mmcorpus.py
+++ b/gensim/corpora/mmcorpus.py
@@ -45,8 +45,8 @@ def save_corpus(fname, corpus, id2word=None, progress_cnt=1000, metadata=False):
         This function is automatically called by `MmCorpus.serialize`; don't
         call it directly, call `serialize` instead.
         """
-        logger.info("storing corpus in Matrix Market format to %s" % fname)
+        logger.info("storing corpus in Matrix Market format to %s", fname)
         num_terms = len(id2word) if id2word is not None else None
-        return matutils.MmWriter.write_corpus(fname, corpus, num_terms=num_terms, index=True, progress_cnt=progress_cnt, metadata=metadata)
-
-# endclass MmCorpus
+        return matutils.MmWriter.write_corpus(
+            fname, corpus, num_terms=num_terms, index=True, progress_cnt=progress_cnt, metadata=metadata
+        )
diff --git a/gensim/corpora/sharded_corpus.py b/gensim/corpora/sharded_corpus.py
index 255fc2b7fe..e5904f2773 100644
--- a/gensim/corpora/sharded_corpus.py
+++ b/gensim/corpora/sharded_corpus.py
@@ -234,8 +234,7 @@ def __init__(self, output_prefix, corpus, dim=None,
         self.current_offset = None   # The index into the dataset which
         # corresponds to index 0 of current shard
 
-        logger.info('Initializing sharded corpus with prefix '
-                     '{0}'.format(output_prefix))
+        logger.info('Initializing sharded corpus with prefix %s', output_prefix)
         if (not os.path.isfile(output_prefix)) or overwrite:
             logger.info('Building from corpus...')
             self.init_shards(output_prefix, corpus, shardsize)
@@ -243,8 +242,7 @@ def __init__(self, output_prefix, corpus, dim=None,
             # Save automatically, to facilitate re-loading
             # and retain information about how the corpus
             # was serialized.
-            logger.info('Saving ShardedCorpus object to '
-                         '{0}'.format(self.output_prefix))
+            logger.info('Saving ShardedCorpus object to %s', self.output_prefix)
             self.save()
         else:
             logger.info('Cloning existing...')
@@ -254,19 +252,18 @@ def init_shards(self, output_prefix, corpus, shardsize=4096, dtype=_default_dtyp
         """Initialize shards from the corpus."""
 
         if not gensim.utils.is_corpus(corpus):
-            raise ValueError('Cannot initialize shards without a corpus to read'
-                             ' from! (Got corpus type: {0})'.format(type(corpus)))
+            raise ValueError(
+                "Cannot initialize shards without a corpus to read from! (Got corpus type: {0})".format(type(corpus))
+            )
 
         proposed_dim = self._guess_n_features(corpus)
         if proposed_dim != self.dim:
             if self.dim is None:
-                logger.info('Deriving dataset dimension from corpus: '
-                             '{0}'.format(proposed_dim))
+                logger.info('Deriving dataset dimension from corpus: %d', proposed_dim)
             else:
                 logger.warning(
-                    'Dataset dimension derived from input corpus diffe'
-                    'rs from initialization argument, using corpus.'
-                    '(corpus {0}, init arg {1})'.format(proposed_dim, self.dim)
+                    "Dataset dimension derived from input corpus differs from initialization argument, "
+                    "using corpus. (corpus %d, init arg %d)", proposed_dim, self.dim
                 )
 
         self.dim = proposed_dim
@@ -277,11 +274,10 @@ def init_shards(self, output_prefix, corpus, shardsize=4096, dtype=_default_dtyp
         logger.info('Running init from corpus.')
 
         for n, doc_chunk in enumerate(gensim.utils.grouper(corpus, chunksize=shardsize)):
-            logger.info('Chunk no. {0} at {1} s'.format(n, time.clock() - start_time))
+            logger.info('Chunk no. %d at %f s', n, time.clock() - start_time)
 
             current_shard = numpy.zeros((len(doc_chunk), self.dim), dtype=dtype)
-            logger.debug('Current chunk dimension: '
-                          '{0} x {1}'.format(len(doc_chunk), self.dim))
+            logger.debug('Current chunk dimension: %d x %d', len(doc_chunk), self.dim)
 
             for i, doc in enumerate(doc_chunk):
                 doc = dict(doc)
@@ -294,7 +290,7 @@ def init_shards(self, output_prefix, corpus, shardsize=4096, dtype=_default_dtyp
             self.save_shard(current_shard)
 
         end_time = time.clock()
-        logger.info('Built {0} shards in {1} s.'.format(self.n_shards, end_time - start_time))
+        logger.info('Built %d shards in %f s.', self.n_shards, end_time - start_time)
 
     def init_by_clone(self):
         """
@@ -309,12 +305,12 @@ def init_by_clone(self):
 
         if temp.dim != self.dim:
             if self.dim is None:
-                logger.info('Loaded dataset dimension: {0}'.format(temp.dim))
+                logger.info('Loaded dataset dimension: %d', temp.dim)
             else:
                 logger.warning(
-                    'Loaded dataset dimension differs from init arg '
-                    'dimension, using loaded dim. '
-                    '(loaded {0}, init {1})'.format(temp.dim, self.dim)
+                    "Loaded dataset dimension differs from init arg dimension, "
+                    "using loaded dim. (loaded %d, init %d)",
+                    temp.dim, self.dim
                 )
 
         self.dim = temp.dim  # To be consistent with the loaded data!
@@ -346,8 +342,6 @@ def load_shard(self, n):
         """
         Load (unpickle) the n-th shard as the "live" part of the dataset
         into the Dataset object."""
-        # logger.debug('ShardedCorpus loading shard {0}, '
-        #              'current shard: {1}'.format(n, self.current_shard_n))
 
         # No-op if the shard is already open.
         if self.current_shard_n == n:
@@ -389,22 +383,12 @@ def shard_by_offset(self, offset):
                              ' supported.'.format(offset))
         return k
 
-        k = -1
-        for i, o in enumerate(self.offsets):
-            if o > offset:  # Condition should fire for every valid offset,
-                            # since the last offset is n_docs (one-past-end).
-                k = i - 1   # First offset is always 0, so i is at least 1.
-                break
-
-        return k
-
     def in_current(self, offset):
         """
         Determine whether the given offset falls within the current shard.
 
         """
-        return (self.current_offset <= offset) \
-                and (offset < self.offsets[self.current_shard_n + 1])
+        return (self.current_offset <= offset) and (offset < self.offsets[self.current_shard_n + 1])
 
     def in_next(self, offset):
         """
@@ -416,8 +400,7 @@ def in_next(self, offset):
         """
         if self.current_shard_n == self.n_shards:
             return False  # There's no next shard.
-        return (self.offsets[self.current_shard_n + 1] <= offset) \
-               and (offset < self.offsets[self.current_shard_n + 2])
+        return (self.offsets[self.current_shard_n + 1] <= offset) and (offset < self.offsets[self.current_shard_n + 2])
 
     def resize_shards(self, shardsize):
         """
@@ -472,9 +455,7 @@ def resize_shards(self, shardsize):
             for old_shard_n, old_shard_name in enumerate(old_shard_names):
                 os.remove(old_shard_name)
         except Exception as e:
-            logger.error('Exception occurred during old shard no. {0} '
-                          'removal: {1}.\nAttempting to at least move '
-                          'new shards in.'.format(old_shard_n, str(e)))
+            logger.error('Exception occurred during old shard no. %d removal: %s.\nAttempting to at least move new shards in.', old_shard_n, str(e))
         finally:
             # If something happens with cleaning up - try to at least get the
             # new guys in.
@@ -483,9 +464,8 @@ def resize_shards(self, shardsize):
                     os.rename(new_shard_name, self._shard_name(shard_n))
             # If something happens when we're in this stage, we're screwed.
             except Exception as e:
-                print(e)
-                raise RuntimeError('Resizing completely failed for some reason.'
-                                   ' Sorry, dataset is probably ruined...')
+                logger.exception(e)
+                raise RuntimeError('Resizing completely failed for some reason. Sorry, dataset is probably ruined...')
             finally:
                 # Sets the new shard stats.
                 self.n_shards = n_new_shards
@@ -529,21 +509,20 @@ def _guess_n_features(self, corpus):
                 return self._guess_n_features(corpus.corpus)
         else:
             if not self.dim:
-                raise TypeError('Couldn\'t find number of features, '
-                                 'refusing to guess (dimension set to {0},'
-                                 'type of corpus: {1}).'.format(self.dim, type(corpus)))
-            else:
-                logger.warning(
-                    'Couldn\'t find number of features, trusting '
-                    'supplied dimension ({0})'.format(self.dim)
+                raise TypeError(
+                    "Couldn't find number of features, refusing to guess "
+                    "(dimension set to {0}, type of corpus: {1})."
+                    .format(self.dim, type(corpus))
                 )
+            else:
+                logger.warning("Couldn't find number of features, trusting supplied dimension (%d)", self.dim)
                 n_features = self.dim
 
         if self.dim and n_features != self.dim:
             logger.warning(
-                'Discovered inconsistent dataset dim ({0}) and '
-                'feature count from corpus ({1}). Coercing to dimension'
-                ' given by argument.'.format(self.dim, n_features)
+                "Discovered inconsistent dataset dim (%d) and feature count from corpus (%d). "
+                "Coercing to dimension given by argument.",
+                self.dim, n_features
             )
 
         return n_features
@@ -598,8 +577,7 @@ def __getitem__(self, offset):
             start = offset.start
             stop = offset.stop
             if stop > self.n_docs:
-                raise IndexError('Requested slice offset {0} out of range'
-                                 ' ({1} docs)'.format(stop, self.n_docs))
+                raise IndexError('Requested slice offset {0} out of range ({1} docs)'.format(stop, self.n_docs))
 
             # - get range of shards over which to iterate
             first_shard = self.shard_by_offset(start)
@@ -610,16 +588,11 @@ def __getitem__(self, offset):
                 # This fails on one-past
                 # slice indexing; that's why there's a code branch here.
 
-            # logger.debug('ShardedCorpus: Retrieving slice {0}: '
-            #              'shard {1}'.format((offset.start, offset.stop),
-            #                                 (first_shard, last_shard)))
-
             self.load_shard(first_shard)
 
             # The easy case: both in one shard.
             if first_shard == last_shard:
-                s_result = self.current_shard[start - self.current_offset:
-                                            stop - self.current_offset]
+                s_result = self.current_shard[start - self.current_offset: stop - self.current_offset]
                 # Handle different sparsity settings:
                 s_result = self._getitem_format(s_result)
 
@@ -627,11 +600,9 @@ def __getitem__(self, offset):
 
             # The hard case: the slice is distributed across multiple shards
             # - initialize numpy.zeros()
-            s_result = numpy.zeros((stop - start, self.dim),
-                                   dtype=self.current_shard.dtype)
+            s_result = numpy.zeros((stop - start, self.dim), dtype=self.current_shard.dtype)
             if self.sparse_serialization:
-                s_result = sparse.csr_matrix((0, self.dim),
-                                             dtype=self.current_shard.dtype)
+                s_result = sparse.csr_matrix((0, self.dim), dtype=self.current_shard.dtype)
 
             # - gradually build it up. We will be using three set of start:stop
             #   indexes:
@@ -652,13 +623,11 @@ def __getitem__(self, offset):
             #  - if in ending shard, these are from 0
             #    to (stop - current_offset)
             shard_start = start - self.current_offset
-            shard_stop = self.offsets[self.current_shard_n + 1] - \
-                         self.current_offset
+            shard_stop = self.offsets[self.current_shard_n + 1] - self.current_offset
 
             # s_result[result_start:result_stop] = self.current_shard[
             #                                         shard_start:shard_stop]
-            s_result = self.__add_to_slice(s_result, result_start, result_stop,
-                                           shard_start, shard_stop)
+            s_result = self.__add_to_slice(s_result, result_start, result_stop, shard_start, shard_stop)
 
             # First and last get special treatment, these are in between
             for shard_n in xrange(first_shard + 1, last_shard):
@@ -669,9 +638,7 @@ def __getitem__(self, offset):
                 shard_start = 0
                 shard_stop = self.shardsize
 
-                s_result = self.__add_to_slice(s_result, result_start,
-                                               result_stop, shard_start,
-                                               shard_stop)
+                s_result = self.__add_to_slice(s_result, result_start, result_stop, shard_start, shard_stop)
 
             # Last shard
             self.load_shard(last_shard)
@@ -680,9 +647,7 @@ def __getitem__(self, offset):
             shard_start = 0
             shard_stop = stop - self.current_offset
 
-            s_result = self.__add_to_slice(s_result, result_start, result_stop,
-                                           shard_start, shard_stop)
-
+            s_result = self.__add_to_slice(s_result, result_start, result_stop, shard_start, shard_stop)
             s_result = self._getitem_format(s_result)
 
             return s_result
@@ -707,10 +672,7 @@ def __add_to_slice(self, s_result, result_start, result_stop, start, stop):
         Returns the resulting s_result.
         """
         if (result_stop - result_start) != (stop - start):
-            raise ValueError('Result start/stop range different than stop/start'
-                             'range (%d - %d vs. %d - %d)'.format(result_start,
-                                                                  result_stop,
-                                                                  start, stop))
+            raise ValueError('Result start/stop range different than stop/start range (%d - %d vs. %d - %d)'.format(result_start, result_stop, start, stop))
 
         # Dense data: just copy using numpy's slice notation
         if not self.sparse_serialization:
@@ -722,10 +684,7 @@ def __add_to_slice(self, s_result, result_start, result_stop, start, stop):
         # result.
         else:
             if s_result.shape != (result_start, self.dim):
-                raise ValueError('Assuption about sparse s_result shape '
-                                 'invalid: {0} expected rows, {1} real '
-                                 'rows.'.format(result_start,
-                                                s_result.shape[0]))
+                raise ValueError('Assuption about sparse s_result shape invalid: {0} expected rows, {1} real rows.'.format(result_start, s_result.shape[0]))
 
             tmp_matrix = self.current_shard[start:stop]
             s_result = sparse.vstack([s_result, tmp_matrix])
@@ -789,19 +748,12 @@ def save(self, *args, **kwargs):
         if len(args) == 0:
             args = tuple([self.output_prefix])
 
-        attrs_to_ignore = ['current_shard',
-                           'current_shard_n',
-                           'current_offset']
+        attrs_to_ignore = ['current_shard', 'current_shard_n', 'current_offset']
         if 'ignore' not in kwargs:
             kwargs['ignore'] = frozenset(attrs_to_ignore)
         else:
-            kwargs['ignore'] = frozenset([v for v in kwargs['ignore']]
-                                         + attrs_to_ignore)
+            kwargs['ignore'] = frozenset([v for v in kwargs['ignore']] + attrs_to_ignore)
         super(ShardedCorpus, self).save(*args, **kwargs)
-        #
-        # self.reset()
-        # with smart_open(self.output_prefix, 'wb') as pickle_handle:
-        #     cPickle.dump(self, pickle_handle)
 
     @classmethod
     def load(cls, fname, mmap=None):
@@ -811,8 +763,7 @@ def load(cls, fname, mmap=None):
         return super(ShardedCorpus, cls).load(fname, mmap)
 
     @staticmethod
-    def save_corpus(fname, corpus, id2word=None, progress_cnt=1000,
-                    metadata=False, **kwargs):
+    def save_corpus(fname, corpus, id2word=None, progress_cnt=1000, metadata=False, **kwargs):
         """
         Implement a serialization interface. Do not call directly;
         use the `serialize` method instead.
@@ -834,9 +785,7 @@ def save_corpus(fname, corpus, id2word=None, progress_cnt=1000,
         ShardedCorpus(fname, corpus, **kwargs)
 
     @classmethod
-    def serialize(serializer, fname, corpus, id2word=None,
-                  index_fname=None, progress_cnt=None, labels=None,
-                  metadata=False, **kwargs):
+    def serialize(serializer, fname, corpus, id2word=None, index_fname=None, progress_cnt=None, labels=None, metadata=False, **kwargs):
         """
         Iterate through the document stream `corpus`, saving the documents
         as a ShardedCorpus to `fname`.
@@ -849,6 +798,4 @@ def serialize(serializer, fname, corpus, id2word=None,
         Ignore the parameters id2word, index_fname, progress_cnt, labels
         and metadata. They currently do nothing and are here only to
         provide a compatible method signature with superclass."""
-        serializer.save_corpus(fname, corpus, id2word=id2word,
-                               progress_cnt=progress_cnt, metadata=metadata,
-                               **kwargs)
+        serializer.save_corpus(fname, corpus, id2word=id2word, progress_cnt=progress_cnt, metadata=metadata, **kwargs)
diff --git a/gensim/corpora/svmlightcorpus.py b/gensim/corpora/svmlightcorpus.py
index 5e24419421..290414836e 100644
--- a/gensim/corpora/svmlightcorpus.py
+++ b/gensim/corpora/svmlightcorpus.py
@@ -56,7 +56,7 @@ def __init__(self, fname, store_labels=True):
 
         """
         IndexedCorpus.__init__(self, fname)
-        logger.info("loading corpus from %s" % fname)
+        logger.info("loading corpus from %s", fname)
 
         self.fname = fname  # input file, see class doc for format
         self.length = None
@@ -89,7 +89,7 @@ def save_corpus(fname, corpus, id2word=None, labels=False, metadata=False):
         This function is automatically called by `SvmLightCorpus.serialize`; don't
         call it directly, call `serialize` instead.
         """
-        logger.info("converting corpus to SVMlight format: %s" % fname)
+        logger.info("converting corpus to SVMlight format: %s", fname)
 
         offsets = []
         with utils.smart_open(fname, 'wb') as fout:
@@ -129,5 +129,3 @@ def doc2line(doc, label=0):
         """
         pairs = ' '.join("%i:%s" % (termid + 1, termval) for termid, termval in doc)  # +1 to convert 0-base to 1-base
         return "%s %s\n" % (label, pairs)
-
-# endclass SvmLightCorpus
diff --git a/gensim/corpora/textcorpus.py b/gensim/corpora/textcorpus.py
index 4be904e613..7265d20d0c 100644
--- a/gensim/corpora/textcorpus.py
+++ b/gensim/corpora/textcorpus.py
@@ -112,8 +112,7 @@ class TextCorpus(interfaces.CorpusABC):
     6.  remove stopwords; see `gensim.parsing.preprocessing` for the list of stopwords
 
     """
-    def __init__(self, input=None, dictionary=None, metadata=False, character_filters=None,
-                 tokenizer=None, token_filters=None):
+    def __init__(self, input=None, dictionary=None, metadata=False, character_filters=None, tokenizer=None, token_filters=None):
         """
         Args:
             input (str): path to top-level directory to traverse for corpus documents.
@@ -171,9 +170,7 @@ def init_dictionary(self, dictionary):
             else:
                 logger.info("Input stream provided but dictionary already initialized")
         else:
-            logger.warning(
-                "No input document stream provided; assuming "
-                "dictionary will be initialized some other way.")
+            logger.warning("No input document stream provided; assuming dictionary will be initialized some other way.")
 
     def __iter__(self):
         """The function that defines a corpus.
@@ -231,8 +228,7 @@ def step_through_preprocess(self, text):
         yield (self.tokenizer, tokens)
 
         for token_filter in self.token_filters:
-            tokens = token_filter(tokens)
-            yield (token_filter, tokens)
+            yield (token_filter, token_filter(tokens))
 
     def get_texts(self):
         """Iterate over the collection, yielding one document at a time. A document
@@ -308,7 +304,6 @@ def __len__(self):
             # cache the corpus length
             self.length = sum(1 for _ in self.getstream())
         return self.length
-# endclass TextCorpus
 
 
 class TextDirectoryCorpus(TextCorpus):
@@ -433,7 +428,6 @@ def _cache_corpus_length(self):
             self.length = sum(1 for _ in self.iter_filepaths())
         else:
             self.length = sum(1 for _ in self.getstream())
-# endclass TextDirectoryCorpus
 
 
 def walk(top, topdown=True, onerror=None, followlinks=False, depth=0):
diff --git a/gensim/corpora/ucicorpus.py b/gensim/corpora/ucicorpus.py
index 0c09cc7e34..a8911ee07f 100644
--- a/gensim/corpora/ucicorpus.py
+++ b/gensim/corpora/ucicorpus.py
@@ -21,7 +21,6 @@
 from gensim.corpora import IndexedCorpus
 from gensim.matutils import MmReader
 from gensim.matutils import MmWriter
-from six import iteritems
 from six.moves import xrange
 
 
@@ -37,7 +36,7 @@ def __init__(self, input):
         which is expected to be in the UCI Bag-of-Words format.
         """
 
-        logger.info('Initializing corpus reader from %s' % input)
+        logger.info('Initializing corpus reader from %s', input)
 
         self.input = input
 
@@ -50,16 +49,16 @@ def __init__(self, input):
             except StopIteration:
                 pass
 
-        logger.info('accepted corpus with %i documents, %i features, %i non-zero entries' %
-            (self.num_docs, self.num_terms, self.num_nnz))
+        logger.info(
+            "accepted corpus with %i documents, %i features, %i non-zero entries",
+            self.num_docs, self.num_terms, self.num_nnz
+        )
 
     def skip_headers(self, input_file):
         for lineno, _ in enumerate(input_file):
             if lineno == 2:
                 break
 
-# endclass UciReader
-
 
 class UciWriter(MmWriter):
     """
@@ -110,7 +109,7 @@ def write_corpus(fname, corpus, progress_cnt=1000, index=False):
         offsets = []
         for docno, bow in enumerate(corpus):
             if docno % progress_cnt == 0:
-                logger.info("PROGRESS: saving document #%i" % docno)
+                logger.info("PROGRESS: saving document #%i", docno)
             if index:
                 posnow = writer.fout.tell()
                 if posnow == poslast:
@@ -125,11 +124,11 @@ def write_corpus(fname, corpus, progress_cnt=1000, index=False):
         num_docs = docno + 1
 
         if num_docs * num_terms != 0:
-            logger.info("saved %ix%i matrix, density=%.3f%% (%i/%i)" %
-                         (num_docs, num_terms,
-                          100.0 * num_nnz / (num_docs * num_terms),
-                          num_nnz,
-                          num_docs * num_terms))
+            logger.info(
+                "saved %ix%i matrix, density=%.3f%% (%i/%i)",
+                num_docs, num_terms, 100.0 * num_nnz / (num_docs * num_terms),
+                num_nnz, num_docs * num_terms
+            )
 
         # now write proper headers, by seeking and overwriting the spaces written earlier
         writer.update_headers(num_docs, num_terms, num_nnz)
@@ -138,8 +137,6 @@ def write_corpus(fname, corpus, progress_cnt=1000, index=False):
         if index:
             return offsets
 
-# endclass UciWriter
-
 
 class UciCorpus(UciReader, IndexedCorpus):
     """
@@ -179,14 +176,14 @@ def create_dictionary(self):
         dictionary.dfs = defaultdict(int)
 
         dictionary.id2token = self.id2word
-        dictionary.token2id = dict((v, k) for k, v in iteritems(self.id2word))
+        dictionary.token2id = utils.revdict(self.id2word)
 
         dictionary.num_docs = self.num_docs
         dictionary.num_nnz = self.num_nnz
 
         for docno, doc in enumerate(self):
             if docno % 10000 == 0:
-                logger.info('PROGRESS: processing document %i of %i' % (docno, self.num_docs))
+                logger.info('PROGRESS: processing document %i of %i', docno, self.num_docs)
 
             for word, count in doc:
                 dictionary.dfs[word] += 1
@@ -214,13 +211,11 @@ def save_corpus(fname, corpus, id2word=None, progress_cnt=10000, metadata=False)
 
         # write out vocabulary
         fname_vocab = utils.smart_extension(fname, '.vocab')
-        logger.info("saving vocabulary of %i words to %s" % (num_terms, fname_vocab))
+        logger.info("saving vocabulary of %i words to %s", num_terms, fname_vocab)
         with utils.smart_open(fname_vocab, 'wb') as fout:
             for featureid in xrange(num_terms):
                 fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))
 
-        logger.info("storing corpus in UCI Bag-of-Words format: %s" % fname)
+        logger.info("storing corpus in UCI Bag-of-Words format: %s", fname)
 
         return UciWriter.write_corpus(fname, corpus, index=True, progress_cnt=progress_cnt)
-
-# endclass UciCorpus
diff --git a/gensim/corpora/wikicorpus.py b/gensim/corpora/wikicorpus.py
index ea87cce4a2..4a70f106f3 100755
--- a/gensim/corpora/wikicorpus.py
+++ b/gensim/corpora/wikicorpus.py
@@ -57,9 +57,11 @@
 
 # MediaWiki namespaces (https://www.mediawiki.org/wiki/Manual:Namespace) that
 # ought to be ignored
-IGNORED_NAMESPACES = ['Wikipedia', 'Category', 'File', 'Portal', 'Template',
-                      'MediaWiki', 'User', 'Help', 'Book', 'Draft',
-                      'WikiProject', 'Special', 'Talk']
+IGNORED_NAMESPACES = [
+    'Wikipedia', 'Category', 'File', 'Portal', 'Template',
+    'MediaWiki', 'User', 'Help', 'Book', 'Draft', 'WikiProject',
+    'Special', 'Talk'
+]
 
 
 def filter_wiki(raw):
@@ -143,10 +145,7 @@ def remove_template(s):
         prev_c = c
 
     # Remove all the templates
-    s = ''.join([s[end + 1:start] for start, end in
-                 zip(starts + [None], [-1] + ends)])
-
-    return s
+    return ''.join([s[end + 1:start] for start, end in zip(starts + [None], [-1] + ends)])
 
 
 def remove_file(s):
@@ -184,8 +183,7 @@ def get_namespace(tag):
     m = re.match("^{(.*?)}", tag)
     namespace = m.group(1) if m else ""
     if not namespace.startswith("http://www.mediawiki.org/xml/export-"):
-        raise ValueError("%s not recognized as MediaWiki dump namespace"
-                         % namespace)
+        raise ValueError("%s not recognized as MediaWiki dump namespace" % namespace)
     return namespace
 
 
@@ -271,8 +269,7 @@ class WikiCorpus(TextCorpus):
 
     """
 
-    def __init__(self, fname, processes=None, lemmatize=utils.has_pattern(), dictionary=None,
-                 filter_namespaces=('0',)):
+    def __init__(self, fname, processes=None, lemmatize=utils.has_pattern(), dictionary=None, filter_namespaces=('0',)):
         """
         Initialize the corpus. Unless a dictionary is provided, this scans the
         corpus once, to determine its vocabulary.
@@ -311,10 +308,10 @@ def get_texts(self):
         """
         articles, articles_all = 0, 0
         positions, positions_all = 0, 0
-        texts = \
-            ((text, self.lemmatize, title, pageid)
-             for title, text, pageid
-             in extract_pages(bz2.BZ2File(self.fname), self.filter_namespaces))
+        texts = (
+            (text, self.lemmatize, title, pageid) for title, text, pageid
+            in extract_pages(bz2.BZ2File(self.fname), self.filter_namespaces)
+        )
         pool = multiprocessing.Pool(self.processes, init_to_ignore_interrupt)
 
         try:
@@ -335,15 +332,16 @@ def get_texts(self):
                         yield tokens
         except KeyboardInterrupt:
             logger.warn(
-                "user terminated iteration over Wikipedia corpus after %i documents with %i positions"
-                " (total %i articles, %i positions before pruning articles shorter than %i words)",
-                articles, positions, articles_all, positions_all, ARTICLE_MIN_WORDS)
+                "user terminated iteration over Wikipedia corpus after %i documents with %i positions "
+                "(total %i articles, %i positions before pruning articles shorter than %i words)",
+                articles, positions, articles_all, positions_all, ARTICLE_MIN_WORDS
+            )
         else:
             logger.info(
-                "finished iterating over Wikipedia corpus of %i documents with %i positions"
-                " (total %i articles, %i positions before pruning articles shorter than %i words)",
-                articles, positions, articles_all, positions_all, ARTICLE_MIN_WORDS)
+                "finished iterating over Wikipedia corpus of %i documents with %i positions "
+                "(total %i articles, %i positions before pruning articles shorter than %i words)",
+                articles, positions, articles_all, positions_all, ARTICLE_MIN_WORDS
+            )
             self.length = articles  # cache corpus length
         finally:
             pool.terminate()
-# endclass WikiCorpus
diff --git a/gensim/examples/dmlcz/dmlcorpus.py b/gensim/examples/dmlcz/dmlcorpus.py
index d8fb8c4cb5..07fc247f8b 100644
--- a/gensim/examples/dmlcz/dmlcorpus.py
+++ b/gensim/examples/dmlcz/dmlcorpus.py
@@ -40,9 +40,9 @@ def __init__(self, configId, resultDir, acceptLangs=None):
         self.sources = {}  # all article sources; see sources.DmlSource class for an example of source
 
         if acceptLangs is None:  # which languages to accept
-            acceptLangs = set(['any'])  # if not specified, accept all languages (including unknown/unspecified)
+            acceptLangs = {'any'}  # if not specified, accept all languages (including unknown/unspecified)
         self.acceptLangs = set(acceptLangs)
-        logger.info('initialized %s' % self)
+        logger.info('initialized %s', self)
 
     def resultFile(self, fname):
         return os.path.join(self.resultDir, self.configId + '_' + fname)
@@ -105,13 +105,12 @@ def buildDictionary(self):
         them into tokens and converting tokens to their ids (creating new ids as
         necessary).
         """
-        logger.info("creating dictionary from %i articles" % len(self.documents))
+        logger.info("creating dictionary from %i articles", len(self.documents))
         self.dictionary = dictionary.Dictionary()
         numPositions = 0
         for docNo, (sourceId, docUri) in enumerate(self.documents):
             if docNo % 1000 == 0:
-                logger.info("PROGRESS: at document #%i/%i (%s, %s)" %
-                             (docNo, len(self.documents), sourceId, docUri))
+                logger.info("PROGRESS: at document #%i/%i (%s, %s)", docNo, len(self.documents), sourceId, docUri)
             source = self.config.sources[sourceId]
             contents = source.getContent(docUri)
             words = [source.normalizeWord(word) for word in source.tokenize(contents)]
@@ -119,8 +118,7 @@ def buildDictionary(self):
 
             # convert to bag-of-words, but ignore the result -- here we only care about updating token ids
             _ = self.dictionary.doc2bow(words, allowUpdate=True)  # noqa:F841
-        logger.info("built %s from %i documents (total %i corpus positions)" %
-                     (self.dictionary, len(self.documents), numPositions))
+        logger.info("built %s from %i documents (total %i corpus positions)", self.dictionary, len(self.documents), numPositions)
 
     def processConfig(self, config, shuffle=False):
         """
@@ -135,31 +133,29 @@ def processConfig(self, config, shuffle=False):
         """
         self.config = config
         self.documents = []
-        logger.info("processing config %s" % config)
+        logger.info("processing config %s", config)
         for sourceId, source in config.sources.iteritems():
-            logger.info("processing source '%s'" % sourceId)
+            logger.info("processing source '%s'", sourceId)
             accepted = []
             for articleUri in source.findArticles():
                 meta = source.getMeta(articleUri)  # retrieve metadata (= dictionary of key->value)
                 if config.acceptArticle(meta):  # do additional filtering on articles, based on the article's metadata
                     accepted.append((sourceId, articleUri))
-            logger.info("accepted %i articles for source '%s'" %
-                         (len(accepted), sourceId))
+            logger.info("accepted %i articles for source '%s'", len(accepted), sourceId)
             self.documents.extend(accepted)
 
         if not self.documents:
             logger.warning('no articles at all found from the config; something went wrong!')
 
         if shuffle:
-            logger.info("shuffling %i documents for random order" % len(self.documents))
+            logger.info("shuffling %i documents for random order", len(self.documents))
             import random
             random.shuffle(self.documents)
 
-        logger.info("accepted total of %i articles for %s" %
-                     (len(self.documents), str(config)))
+        logger.info("accepted total of %i articles for %s", len(self.documents), str(config))
 
     def saveDictionary(self, fname):
-        logger.info("saving dictionary mapping to %s" % fname)
+        logger.info("saving dictionary mapping to %s", fname)
         fout = open(fname, 'w')
         for tokenId, token in self.dictionary.id2token.iteritems():
             fout.write("%i\t%s\n" % (tokenId, token))
@@ -177,7 +173,7 @@ def loadDictionary(fname):
         return result
 
     def saveDocuments(self, fname):
-        logger.info("saving documents mapping to %s" % fname)
+        logger.info("saving documents mapping to %s", fname)
         fout = open(fname, 'w')
         for docNo, docId in enumerate(self.documents):
             sourceId, docUri = docId
diff --git a/gensim/examples/dmlcz/gensim_build.py b/gensim/examples/dmlcz/gensim_build.py
index 9695241fb3..bb62103109 100755
--- a/gensim/examples/dmlcz/gensim_build.py
+++ b/gensim/examples/dmlcz/gensim_build.py
@@ -24,24 +24,20 @@
 
 if AT_HOME:
     SOURCE_LIST = [
-                   sources.DmlCzSource('dmlcz', '/Users/kofola/workspace/dml/data/dmlcz/'),
-                   sources.DmlSource('numdam', '/Users/kofola/workspace/dml/data/numdam/'),
-                   sources.ArxmlivSource('arxmliv', '/Users/kofola/workspace/dml/data/arxmliv/'),
-                   ]
-
-#    SOURCE_LIST = [
-#                   sources.DmlCzSource('dmlcz', '/Users/kofola/workspace/dml/data/dmlcz/CzechMathJ'),
-#                   ]
+        sources.DmlCzSource('dmlcz', '/Users/kofola/workspace/dml/data/dmlcz/'),
+        sources.DmlSource('numdam', '/Users/kofola/workspace/dml/data/numdam/'),
+        sources.ArxmlivSource('arxmliv', '/Users/kofola/workspace/dml/data/arxmliv/'),
+     ]
 
     RESULT_DIR = '/Users/kofola/workspace/dml/data/results'
 
 else:
 
     SOURCE_LIST = [
-                   sources.DmlCzSource('dmlcz', '/data/dmlcz/data/share'),
-                   sources.DmlSource('numdam', '/data/dmlcz/data/numdam'),
-                   sources.ArxmlivSource('arxmliv', '/data/dmlcz/data/arxmliv'),
-                   ]
+        sources.DmlCzSource('dmlcz', '/data/dmlcz/data/share'),
+        sources.DmlSource('numdam', '/data/dmlcz/data/numdam'),
+        sources.ArxmlivSource('arxmliv', '/data/dmlcz/data/arxmliv'),
+    ]
 
     RESULT_DIR = '/data/dmlcz/xrehurek/results'
 
@@ -60,7 +56,7 @@ def buildDmlCorpus(config):
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
     logging.root.setLevel(level=logging.INFO)
-    logging.info("running %s" % ' '.join(sys.argv))
+    logging.info("running %s", ' '.join(sys.argv))
 
     program = os.path.basename(sys.argv[0])
 
@@ -76,4 +72,4 @@ def buildDmlCorpus(config):
         config.addSource(source)
     buildDmlCorpus(config)
 
-    logging.info("finished running %s" % program)
+    logging.info("finished running %s", program)
diff --git a/gensim/examples/dmlcz/gensim_genmodel.py b/gensim/examples/dmlcz/gensim_genmodel.py
index df11f9696c..a2f2b792e7 100755
--- a/gensim/examples/dmlcz/gensim_genmodel.py
+++ b/gensim/examples/dmlcz/gensim_genmodel.py
@@ -31,7 +31,7 @@
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
     logging.root.setLevel(level=logging.INFO)
-    logging.info("running %s" % ' '.join(sys.argv))
+    logging.info("running %s", ' '.join(sys.argv))
 
     program = os.path.basename(sys.argv[0])
 
@@ -46,9 +46,9 @@
     config = dmlcorpus.DmlConfig('%s_%s' % (gensim_build.PREFIX, language),
                                  resultDir=gensim_build.RESULT_DIR, acceptLangs=[language])
 
-    logging.info("loading word id mapping from %s" % config.resultFile('wordids.txt'))
+    logging.info("loading word id mapping from %s", config.resultFile('wordids.txt'))
     id2word = dmlcorpus.DmlCorpus.loadDictionary(config.resultFile('wordids.txt'))
-    logging.info("loaded %i word ids" % len(id2word))
+    logging.info("loaded %i word ids", len(id2word))
 
     corpus = MmCorpus(config.resultFile('bow.mm'))
 
@@ -56,23 +56,23 @@
         model = tfidfmodel.TfidfModel(corpus, id2word=id2word, normalize=True)
         model.save(config.resultFile('model_tfidf.pkl'))
     elif method == 'lda':
-        model = ldamodel.LdaModel(corpus, id2word=id2word, numTopics=DIM_LDA)
+        model = ldamodel.LdaModel(corpus, id2word=id2word, num_topics=DIM_LDA)
         model.save(config.resultFile('model_lda.pkl'))
     elif method == 'lsi':
         # first, transform word counts to tf-idf weights
         tfidf = tfidfmodel.TfidfModel(corpus, id2word=id2word, normalize=True)
         # then find the transformation from tf-idf to latent space
-        model = lsimodel.LsiModel(tfidf[corpus], id2word=id2word, numTopics=DIM_LSI)
+        model = lsimodel.LsiModel(tfidf[corpus], id2word=id2word, num_topics=DIM_LSI)
         model.save(config.resultFile('model_lsi.pkl'))
     elif method == 'rp':
         # first, transform word counts to tf-idf weights
         tfidf = tfidfmodel.TfidfModel(corpus, id2word=id2word, normalize=True)
         # then find the transformation from tf-idf to latent space
-        model = rpmodel.RpModel(tfidf[corpus], id2word=id2word, numTopics=DIM_RP)
+        model = rpmodel.RpModel(tfidf[corpus], id2word=id2word, num_topics=DIM_RP)
         model.save(config.resultFile('model_rp.pkl'))
     else:
         raise ValueError('unknown topic extraction method: %s' % repr(method))
 
     MmCorpus.saveCorpus(config.resultFile('%s.mm' % method), model[corpus])
 
-    logging.info("finished running %s" % program)
+    logging.info("finished running %s", program)
diff --git a/gensim/examples/dmlcz/gensim_xml.py b/gensim/examples/dmlcz/gensim_xml.py
index f810d045d4..0b8661ac77 100755
--- a/gensim/examples/dmlcz/gensim_xml.py
+++ b/gensim/examples/dmlcz/gensim_xml.py
@@ -72,20 +72,20 @@ def generateSimilar(corpus, index, method):
         if SAVE_EMPTY or articles:
             output = ''.join(articles)  # concat all similars to one string
             if not DRY_RUN:  # only open output files for writing if DRY_RUN is false
-                logging.info("generating %s (%i similars)" % (outfile, len(articles)))
+                logging.info("generating %s (%i similars)", outfile, len(articles))
                 outfile = open(outfile, 'w')
                 outfile.write(SIMILAR % output)  # add xml headers and print to file
                 outfile.close()
             else:
-                logging.info("would be generating %s (%i similars):%s\n" % (outfile, len(articles), output))
+                logging.info("would be generating %s (%i similars):%s\n", outfile, len(articles), output)
         else:
-            logging.debug("skipping %s (no similar found)" % outfile)
+            logging.debug("skipping %s (no similar found)", outfile)
 
 
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
     logging.root.setLevel(level=logging.INFO)
-    logging.info("running %s" % ' '.join(sys.argv))
+    logging.info("running %s", ' '.join(sys.argv))
 
     program = os.path.basename(sys.argv[0])
 
@@ -100,9 +100,9 @@ def generateSimilar(corpus, index, method):
     config = dmlcorpus.DmlConfig('%s_%s' % (gensim_build.PREFIX, language),
                                  resultDir=gensim_build.RESULT_DIR, acceptLangs=[language])
 
-    logging.info("loading word id mapping from %s" % config.resultFile('wordids.txt'))
+    logging.info("loading word id mapping from %s", config.resultFile('wordids.txt'))
     id2word = dmlcorpus.DmlCorpus.loadDictionary(config.resultFile('wordids.txt'))
-    logging.info("loaded %i word ids" % len(id2word))
+    logging.info("loaded %i word ids", len(id2word))
 
     corpus = dmlcorpus.DmlCorpus.load(config.resultFile('.pkl'))
     input = MmCorpus(config.resultFile('_%s.mm' % method))
@@ -110,11 +110,11 @@ def generateSimilar(corpus, index, method):
 
     # initialize structure for similarity queries
     if method == 'lsi' or method == 'rp':  # for these methods, use dense vectors
-        index = MatrixSimilarity(input, numBest=MAX_SIMILAR + 1, numFeatures=input.numTerms)
+        index = MatrixSimilarity(input, num_best=MAX_SIMILAR + 1, num_features=input.numTerms)
     else:
-        index = SparseMatrixSimilarity(input, numBest=MAX_SIMILAR + 1)
+        index = SparseMatrixSimilarity(input, num_best=MAX_SIMILAR + 1)
 
     index.normalize = False  # do not normalize query vectors during similarity queries (the index is already built normalized, so it would be a no-op)
     generateSimilar(corpus, index, method)  # for each document, print MAX_SIMILAR nearest documents to a xml file, in dml-cz specific format
 
-    logging.info("finished running %s" % program)
+    logging.info("finished running %s", program)
diff --git a/gensim/examples/dmlcz/runall.sh b/gensim/examples/dmlcz/runall.sh
index 5d3f5819f2..236c1dce80 100644
--- a/gensim/examples/dmlcz/runall.sh
+++ b/gensim/examples/dmlcz/runall.sh
@@ -7,7 +7,7 @@ BIN_PATH=~/xrehurek/gensim/dmlcz
 RESULT_PATH=~/xrehurek/results
 
 # set python path, so that python can find and import gensim modules
-export PYTHONPATH=~/xrehurek:$PYTHONPATH
+export PYTHONPATH=~/xrehurek:${PYTHONPATH}
 
 # Language is set to 'any', meaning all articles are processed for similarity in
 # one go, regardless of their language.
@@ -17,18 +17,18 @@ language=any
 
 
 # ========== parse all article sources, build article co-occurence matrix ======
-${BIN_PATH}/gensim_build.py $language 2>&1 | tee ${RESULT_PATH}/gensim_build.log
+${BIN_PATH}/gensim_build.py ${language} 2>&1 | tee ${RESULT_PATH}/gensim_build.log
 
 
 # ========== build transformation models =======================================
 for method in tfidf rp;
 do
-	( ${BIN_PATH}/gensim_genmodel.py $language $method 2>&1 | tee ${RESULT_PATH}/gensim_genmodel_${method}.log ) &
+	( ${BIN_PATH}/gensim_genmodel.py ${language} ${method} 2>&1 | tee ${RESULT_PATH}/gensim_genmodel_${method}.log ) &
 done
 wait
 
 method=lsi
-${BIN_PATH}/gensim_genmodel.py $language $method 2>&1 | tee ${RESULT_PATH}/gensim_genmodel_${method}.log
+${BIN_PATH}/gensim_genmodel.py ${language} ${method} 2>&1 | tee ${RESULT_PATH}/gensim_genmodel_${method}.log
 
 
 # =========== generate output xml files ========================================
@@ -36,6 +36,6 @@ ${BIN_PATH}/gensim_genmodel.py $language $method 2>&1 | tee ${RESULT_PATH}/gensi
 # NOTE if out of memory, move tfidf out of the loop (tfidf uses a lot of memory here)
 for method in tfidf lsi rp;
 do
-    ( ${BIN_PATH}/gensim_xml.py $language $method 2>&1 | tee ${RESULT_PATH}/gensim_xml_${method}.log ) &
+    ( ${BIN_PATH}/gensim_xml.py ${language} ${method} 2>&1 | tee ${RESULT_PATH}/gensim_xml_${method}.log ) &
 done
 wait
diff --git a/gensim/examples/dmlcz/sources.py b/gensim/examples/dmlcz/sources.py
index da4e0ac0b0..4193da0820 100644
--- a/gensim/examples/dmlcz/sources.py
+++ b/gensim/examples/dmlcz/sources.py
@@ -110,7 +110,7 @@ def parseDmlMeta(cls, xmlfile):
                 name, cont = name.strip(), cont.strip()
                 if name == 'msc':
                     if len(cont) != 5:
-                        logger.warning('invalid MSC=%s in %s' % (cont, xmlfile))
+                        logger.warning('invalid MSC=%s in %s', cont, xmlfile)
                     result.setdefault('msc', []).append(cont)
                     continue
                 if name == 'idMR':
@@ -132,25 +132,24 @@ def isArticle(self, path):
             return False
         # and contain the fulltext.txt file
         if not os.path.exists(os.path.join(path, 'fulltext.txt')):
-            logger.info('missing fulltext in %s' % path)
+            logger.info('missing fulltext in %s', path)
             return False
         # and also the meta.xml file
         if not os.path.exists(os.path.join(path, 'meta.xml')):
-            logger.info('missing meta.xml in %s' % path)
+            logger.info('missing meta.xml in %s', path)
             return False
         return True
 
     def findArticles(self):
         dirTotal = artAccepted = 0
-        logger.info("looking for '%s' articles inside %s" % (self.sourceId, self.baseDir))
+        logger.info("looking for '%s' articles inside %s", self.sourceId, self.baseDir)
         for root, dirs, files in os.walk(self.baseDir):
             dirTotal += 1
             root = os.path.normpath(root)
             if self.isArticle(root):
                 artAccepted += 1
                 yield self.idFromDir(root)
-        logger.info('%i directories processed, found %i articles' %
-                     (dirTotal, artAccepted))
+        logger.info('%i directories processed, found %i articles', dirTotal, artAccepted)
 
     def getContent(self, uri):
         """
@@ -200,15 +199,15 @@ def isArticle(self, path):
             return False
         # and contain a dspace_id file
         if not (os.path.exists(os.path.join(path, 'dspace_id'))):
-            logger.info('missing dspace_id in %s' % path)
+            logger.info('missing dspace_id in %s', path)
             return False
         # and contain either fulltext.txt or fulltext_dspace.txt file
         if not (os.path.exists(os.path.join(path, 'fulltext.txt')) or os.path.exists(os.path.join(path, 'fulltext-dspace.txt'))):
-            logger.info('missing fulltext in %s' % path)
+            logger.info('missing fulltext in %s', path)
             return False
         # and contain the meta.xml file
         if not os.path.exists(os.path.join(path, 'meta.xml')):
-            logger.info('missing meta.xml in %s' % path)
+            logger.info('missing meta.xml in %s', path)
             return False
         return True
 
@@ -278,7 +277,6 @@ class ArxmlivErrorHandler(xml.sax.handler.ErrorHandler):
         # these errors silently.
         def error(self, exception):
             pass
-#            logger.debug("SAX error parsing xml: %s" % exception)
 
         warning = fatalError = error
     # endclass ArxmlivErrorHandler
@@ -302,21 +300,20 @@ def isArticle(self, path):
             return False
         # and contain the tex.xml file
         if not os.path.exists(os.path.join(path, 'tex.xml')):
-            logger.warning('missing tex.xml in %s' % path)
+            logger.warning('missing tex.xml in %s', path)
             return False
         return True
 
     def findArticles(self):
         dirTotal = artAccepted = 0
-        logger.info("looking for '%s' articles inside %s" % (self.sourceId, self.baseDir))
+        logger.info("looking for '%s' articles inside %s", self.sourceId, self.baseDir)
         for root, dirs, files in os.walk(self.baseDir):
             dirTotal += 1
             root = os.path.normpath(root)
             if self.isArticle(root):
                 artAccepted += 1
                 yield self.idFromDir(root)
-        logger.info('%i directories processed, found %i articles' %
-                     (dirTotal, artAccepted))
+        logger.info('%i directories processed, found %i articles', dirTotal, artAccepted)
 
     def getContent(self, uri):
         """
diff --git a/gensim/interfaces.py b/gensim/interfaces.py
index 4087fd8893..81f85a8527 100644
--- a/gensim/interfaces.py
+++ b/gensim/interfaces.py
@@ -56,8 +56,7 @@ def __iter__(self):
 
     def save(self, *args, **kwargs):
         import warnings
-        warnings.warn("corpus.save() stores only the (tiny) iteration object; "
-            "to serialize the actual corpus content, use e.g. MmCorpus.serialize(corpus)")
+        warnings.warn("corpus.save() stores only the (tiny) iteration object; to serialize the actual corpus content, use e.g. MmCorpus.serialize(corpus)")
         super(CorpusABC, self).save(*args, **kwargs)
 
     def __len__(self):
@@ -95,12 +94,11 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
         raise NotImplementedError('cannot instantiate abstract base class')
 
         # example code:
-        logger.info("converting corpus to ??? format: %s" % fname)
+        logger.info("converting corpus to ??? format: %s", fname)
         with utils.smart_open(fname, 'wb') as fout:
             for doc in corpus:  # iterate over the document stream
                 fmt = str(doc)  # format the document appropriately...
                 fout.write(utils.to_utf8("%s\n" % fmt))  # serialize the formatted document to disk
-# endclass CorpusABC
 
 
 class TransformedCorpus(CorpusABC):
@@ -127,7 +125,6 @@ def __getitem__(self, docno):
             return self.obj[self.corpus[docno]]
         else:
             raise RuntimeError('Type {} does not support slicing.'.format(type(self.corpus)))
-# endclass TransformedCorpus
 
 
 class TransformationABC(utils.SaveLoad):
@@ -162,7 +159,6 @@ def _apply(self, corpus, chunksize=None, **kwargs):
         and return the result as another corpus.
         """
         return TransformedCorpus(self, corpus, chunksize, **kwargs)
-# endclass TransformationABC
 
 
 class SimilarityABC(utils.SaveLoad):
@@ -263,7 +259,7 @@ def __iter__(self):
                 # (unlike numpy). so, clip the end of the chunk explicitly to make
                 # scipy.sparse happy
                 chunk_end = min(self.index.shape[0], chunk_start + self.chunksize)
-                chunk = self.index[chunk_start : chunk_end]
+                chunk = self.index[chunk_start: chunk_end]
                 for sim in self[chunk]:
                     yield sim
         else:
@@ -272,4 +268,3 @@ def __iter__(self):
 
         # restore old normalization value
         self.normalize = norm
-# endclass SimilarityABC
diff --git a/gensim/matutils.py b/gensim/matutils.py
index 58904e1657..4b072bfd4a 100644
--- a/gensim/matutils.py
+++ b/gensim/matutils.py
@@ -21,26 +21,12 @@
 from scipy.stats import entropy
 import scipy.linalg
 from scipy.linalg.lapack import get_lapack_funcs
+from scipy.linalg.special_matrices import triu
 from scipy.special import psi  # gamma function utils
 
 from six import iteritems, itervalues, string_types
 from six.moves import xrange, zip as izip
 
-# scipy is not a stable package yet, locations change, so try to work
-# around differences (currently only concerns location of 'triu' in scipy 0.7 vs. 0.8)
-try:
-    from scipy.linalg.basic import triu
-except ImportError:
-    from scipy.linalg.special_matrices import triu
-
-try:
-    from np import triu_indices
-except ImportError:
-    # np < 1.4
-    def triu_indices(n, k=0):
-        m = np.ones((n, n), int)
-        a = triu(m, k)
-        return np.where(a != 0)
 
 blas = lambda name, ndarray: scipy.linalg.get_blas_funcs((name,), (ndarray,))[0]
 
@@ -101,7 +87,7 @@ def corpus2csc(corpus, num_terms=None, dtype=np.float64, num_docs=None, num_nnz=
         data = np.empty((num_nnz,), dtype=dtype)
         for docno, doc in enumerate(corpus):
             if printprogress and docno % printprogress == 0:
-                logger.info("PROGRESS: at document #%i/%i" % (docno, num_docs))
+                logger.info("PROGRESS: at document #%i/%i", docno, num_docs)
             posnext = posnow + len(doc)
             indices[posnow: posnext] = [feature_id for feature_id, _ in doc]
             data[posnow: posnext] = [feature_weight for _, feature_weight in doc]
@@ -114,7 +100,7 @@ def corpus2csc(corpus, num_terms=None, dtype=np.float64, num_docs=None, num_nnz=
         num_nnz, data, indices, indptr = 0, [], [], [0]
         for docno, doc in enumerate(corpus):
             if printprogress and docno % printprogress == 0:
-                logger.info("PROGRESS: at document #%i" % (docno))
+                logger.info("PROGRESS: at document #%i", docno)
             indices.extend([feature_id for feature_id, _ in doc])
             data.extend([feature_weight for _, feature_weight in doc])
             num_nnz += len(doc)
@@ -150,7 +136,7 @@ def zeros_aligned(shape, dtype, order='C', align=128):
     nbytes = np.prod(shape, dtype=np.int64) * np.dtype(dtype).itemsize
     buffer = np.zeros(nbytes + align, dtype=np.uint8)  # problematic on win64 ("maximum allowed dimension exceeded")
     start_index = -buffer.ctypes.data % align
-    return buffer[start_index : start_index + nbytes].view(dtype).reshape(shape, order=order)
+    return buffer[start_index: start_index + nbytes].view(dtype).reshape(shape, order=order)
 
 
 def ismatrix(m):
@@ -333,7 +319,6 @@ def __iter__(self):
 
     def __len__(self):
         return len(self.dense)
-# endclass DenseCorpus
 
 
 class Sparse2Corpus(object):
@@ -356,7 +341,6 @@ def __iter__(self):
 
     def __len__(self):
         return self.sparse.shape[1]
-# endclass Sparse2Corpus
 
 
 def veclen(vec):
@@ -381,7 +365,7 @@ def ret_log_normalize_vec(vec, axis=1):
         log_shift = log_max - np.log(len(vec) + 1.0) - max_val
         tot = np.sum(np.exp(vec + log_shift))
         log_norm = np.log(tot) - log_shift
-        vec = vec - log_norm
+        vec -= log_norm
     else:
         if axis == 1:  # independently normalize each sample
             max_val = np.max(vec, 1)
@@ -391,10 +375,10 @@ def ret_log_normalize_vec(vec, axis=1):
             vec = vec - log_norm[:, np.newaxis]
         elif axis == 0:  # normalize each feature
             k = ret_log_normalize_vec(vec.T)
-            return (k[0].T, k[1])
+            return k[0].T, k[1]
         else:
             raise ValueError("'%s' is not a supported axis" % axis)
-    return (vec, log_norm)
+    return vec, log_norm
 
 
 blas_nrm2 = blas('nrm2', np.array([], dtype=float))
@@ -435,7 +419,7 @@ def unitvec(vec, norm='l2'):
 
     try:
         first = next(iter(vec))  # is there at least one element?
-    except Exception:
+    except StopIteration:
         return vec
 
     if isinstance(first, (tuple, list)) and len(first) == 2:  # gensim sparse format
@@ -476,10 +460,10 @@ def isbow(vec):
         vec = vec.todense().tolist()
     try:
         id_, val_ = vec[0]  # checking first value to see if it is in bag of words format by unpacking
-        id_, val_ = int(id_), float(val_)
+        int(id_), float(val_)
     except IndexError:
         return True  # this is to handle the empty input case
-    except Exception:
+    except (ValueError, TypeError):
         return False
     return True
 
@@ -610,7 +594,7 @@ def dirichlet_expectation(alpha):
     For a vector `theta~Dir(alpha)`, compute `E[log(theta)]`.
 
     """
-    if (len(alpha.shape) == 1):
+    if len(alpha.shape) == 1:
         result = psi(alpha) - psi(np.sum(alpha))
     else:
         result = psi(alpha) - psi(np.sum(alpha, 1))[:, np.newaxis]
@@ -628,7 +612,7 @@ def qr_destroy(la):
     del la[0], la  # now `a` is the only reference to the input matrix
     m, n = a.shape
     # perform q, r = QR(a); code hacked out of scipy.linalg.qr
-    logger.debug("computing QR of %s dense matrix" % str(a.shape))
+    logger.debug("computing QR of %s dense matrix", str(a.shape))
     geqrf, = get_lapack_funcs(('geqrf',), (a,))
     qr, tau, work, info = geqrf(a, lwork=-1, overwrite_a=True)
     qr, tau, work, info = geqrf(a, lwork=work[0], overwrite_a=True)
@@ -675,12 +659,10 @@ def write_headers(self, num_docs, num_terms, num_nnz):
 
         if num_nnz < 0:
             # we don't know the matrix shape/density yet, so only log a general line
-            logger.info("saving sparse matrix to %s" % self.fname)
+            logger.info("saving sparse matrix to %s", self.fname)
             self.fout.write(utils.to_utf8(' ' * 50 + '\n'))  # 48 digits must be enough for everybody
         else:
-            logger.info(
-                "saving sparse %sx%s matrix with %i non-zero entries to %s",
-                num_docs, num_terms, num_nnz, self.fname)
+            logger.info("saving sparse %sx%s matrix with %i non-zero entries to %s", num_docs, num_terms, num_nnz, self.fname)
             self.fout.write(utils.to_utf8('%s %s %s\n' % (num_docs, num_terms, num_nnz)))
         self.last_docno = -1
         self.headers_written = True
@@ -737,7 +719,7 @@ def write_corpus(fname, corpus, progress_cnt=1000, index=False, num_terms=None,
             else:
                 bow = doc
             if docno % progress_cnt == 0:
-                logger.info("PROGRESS: saving document #%i" % docno)
+                logger.info("PROGRESS: saving document #%i", docno)
             if index:
                 posnow = mw.fout.tell()
                 if posnow == poslast:
@@ -755,11 +737,7 @@ def write_corpus(fname, corpus, progress_cnt=1000, index=False, num_terms=None,
         num_terms = num_terms or _num_terms
 
         if num_docs * num_terms != 0:
-            logger.info("saved %ix%i matrix, density=%.3f%% (%i/%i)" % (
-                num_docs, num_terms,
-                100.0 * num_nnz / (num_docs * num_terms),
-                num_nnz,
-                num_docs * num_terms))
+            logger.info("saved %ix%i matrix, density=%.3f%% (%i/%i)", num_docs, num_terms, 100.0 * num_nnz / (num_docs * num_terms), num_nnz, num_docs * num_terms)
 
         # now write proper headers, by seeking and overwriting the spaces written earlier
         mw.fake_headers(num_docs, num_terms, num_nnz)
@@ -779,10 +757,9 @@ def __del__(self):
         self.close()  # does nothing if called twice (on an already closed file), so no worries
 
     def close(self):
-        logger.debug("closing %s" % self.fname)
+        logger.debug("closing %s", self.fname)
         if hasattr(self, 'fout'):
             self.fout.close()
-# endclass MmWriter
 
 
 class MmReader(object):
@@ -806,7 +783,7 @@ def __init__(self, input, transposed=True):
         `input` is either a string (file path) or a file-like object that supports
         `seek()` (e.g. gzip.GzipFile, bz2.BZ2File).
         """
-        logger.info("initializing corpus reader from %s" % input)
+        logger.info("initializing corpus reader from %s", input)
         self.input, self.transposed = input, transposed
         with utils.file_or_filename(self.input) as lines:
             try:
@@ -821,14 +798,12 @@ def __init__(self, input, transposed=True):
             for lineno, line in enumerate(lines):
                 line = utils.to_unicode(line)
                 if not line.startswith('%'):
-                    self.num_docs, self.num_terms, self.num_nnz = map(int, line.split())
+                    self.num_docs, self.num_terms, self.num_nnz = (int(x) for x in line.split())
                     if not self.transposed:
                         self.num_docs, self.num_terms = self.num_terms, self.num_docs
                     break
 
-        logger.info(
-            "accepted corpus with %i documents, %i features, %i non-zero entries",
-            self.num_docs, self.num_terms, self.num_nnz)
+        logger.info("accepted corpus with %i documents, %i features, %i non-zero entries", self.num_docs, self.num_terms, self.num_nnz)
 
     def __len__(self):
         return self.num_docs
@@ -917,4 +892,3 @@ def docbyoffset(self, offset):
 
             document.append((termid, val,))  # add another field to the current document
         return document
-# endclass MmReader
diff --git a/gensim/models/__init__.py b/gensim/models/__init__.py
index 5c25a86fd5..530f7c4980 100644
--- a/gensim/models/__init__.py
+++ b/gensim/models/__init__.py
@@ -36,7 +36,7 @@ class VocabTransform(interfaces.TransformationABC):
     Old features that have no counterpart in the new ids are discarded. This
     can be used to filter vocabulary of a corpus "online"::
 
-    >>> old2new = dict((oldid, newid) for newid, oldid in enumerate(ids_you_want_to_keep))
+    >>> old2new = {oldid: newid for newid, oldid in enumerate(ids_you_want_to_keep)}
     >>> vt = VocabTransform(old2new)
     >>> for vec_with_new_ids in vt[corpus_with_old_ids]:
     >>>     ...
@@ -44,7 +44,6 @@ class VocabTransform(interfaces.TransformationABC):
     """
 
     def __init__(self, old2new, id2token=None):
-        # id2word = dict((newid, oldid2word[oldid]) for oldid, newid in old2new.iteritems())
         self.old2new = old2new
         self.id2token = id2token
 
@@ -58,4 +57,3 @@ def __getitem__(self, bow):
             return self._apply(bow)
 
         return sorted((self.old2new[oldid], weight) for oldid, weight in bow if oldid in self.old2new)
-# endclass VocabTransform
diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py
index 7e1ee02d62..bcc63e8f8c 100755
--- a/gensim/models/atmodel.py
+++ b/gensim/models/atmodel.py
@@ -84,7 +84,7 @@ def construct_doc2author(corpus, author2doc):
     return doc2author
 
 
-def construct_author2doc(corpus, doc2author):
+def construct_author2doc(doc2author):
     """Make a mapping from author IDs to document IDs."""
 
     # First get a set of all authors.
@@ -118,10 +118,10 @@ class AuthorTopicModel(LdaModel):
     """
 
     def __init__(self, corpus=None, num_topics=100, id2word=None, author2doc=None, doc2author=None,
-            chunksize=2000, passes=1, iterations=50, decay=0.5, offset=1.0,
-            alpha='symmetric', eta='symmetric', update_every=1, eval_every=10,
-            gamma_threshold=0.001, serialized=False, serialization_path=None,
-            minimum_probability=0.01, random_state=None):
+                 chunksize=2000, passes=1, iterations=50, decay=0.5, offset=1.0,
+                 alpha='symmetric', eta='symmetric', update_every=1, eval_every=10,
+                 gamma_threshold=0.001, serialized=False, serialization_path=None,
+                 minimum_probability=0.01, random_state=None):
         """
         If the iterable corpus and one of author2doc/doc2author dictionaries are given,
         start training straight away. If not given, the model is left untrained
@@ -212,7 +212,9 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, author2doc=None, d
 
         self.id2word = id2word
         if corpus is None and self.id2word is None:
-            raise ValueError('at least one of corpus/id2word must be specified, to establish input space dimensionality')
+            raise ValueError(
+                "at least one of corpus/id2word must be specified, to establish input space dimensionality"
+            )
 
         if self.id2word is None:
             logger.warning("no word id mapping provided; initializing from corpus, assuming identity")
@@ -252,7 +254,9 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, author2doc=None, d
         if serialized and not serialization_path:
             raise ValueError("If serialized corpora are used, a the path to a folder where the corpus should be saved must be provided (serialized_path).")
         if serialized and serialization_path:
-            assert not isfile(serialization_path), "A file already exists at the serialization_path path; choose a different serialization_path, or delete the file."
+            assert not isfile(serialization_path), \
+                "A file already exists at the serialization_path path; " \
+                "choose a different serialization_path, or delete the file."
         self.serialization_path = serialization_path
 
         # Initialize an empty self.corpus.
@@ -260,7 +264,8 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, author2doc=None, d
 
         self.alpha, self.optimize_alpha = self.init_dir_prior(alpha, 'alpha')
 
-        assert self.alpha.shape == (self.num_topics,), "Invalid alpha shape. Got shape %s, but expected (%d, )" % (str(self.alpha.shape), self.num_topics)
+        assert self.alpha.shape == (self.num_topics,), \
+            "Invalid alpha shape. Got shape %s, but expected (%d, )" % (str(self.alpha.shape), self.num_topics)
 
         if isinstance(eta, six.string_types):
             if eta == 'asymmetric':
@@ -272,7 +277,8 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, author2doc=None, d
 
         assert (self.eta.shape == (self.num_terms,) or self.eta.shape == (self.num_topics, self.num_terms)), (
                 "Invalid eta shape. Got shape %s, but expected (%d, 1) or (%d, %d)" %
-                (str(self.eta.shape), self.num_terms, self.num_topics, self.num_terms))
+                (str(self.eta.shape), self.num_terms, self.num_topics, self.num_terms)
+        )
 
         # VB constants
         self.iterations = iterations
@@ -300,7 +306,7 @@ def init_empty_corpus(self):
 
         """
         if self.serialized:
-            # Tnitialize the corpus as a serialized empty list.
+            # Initialize the corpus as a serialized empty list.
             # This corpus will be extended in self.update.
             MmCorpus.serialize(self.serialization_path, [])  # Serialize empty corpus.
             self.corpus = MmCorpus(self.serialization_path)  # Store serialized corpus object in self.corpus.
@@ -333,9 +339,8 @@ def extend_corpus(self, corpus):
             assert isinstance(corpus, list), "If serialized == False, all input corpora must be lists."
             self.corpus.extend(corpus)
 
-    def compute_phinorm(self, ids, authors_d, expElogthetad, expElogbetad):
+    def compute_phinorm(self, expElogthetad, expElogbetad):
         """Efficiently computes the normalizing factor in phi."""
-        phinorm = np.zeros(len(ids))
         expElogtheta_sum = expElogthetad.sum(axis=0)
         phinorm = expElogtheta_sum.dot(expElogbetad) + 1e-100
 
@@ -362,8 +367,8 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c
 
         """
         try:
-            _ = len(chunk)  # noqa:F841
-        except Exception:
+            len(chunk)
+        except TypeError:
             # convert iterators/generators to plain list, so we have len() etc.
             chunk = list(chunk)
         if len(chunk) > 1:
@@ -389,9 +394,9 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c
             # TODO: this is duplication of code in LdaModel. Refactor.
             if doc and not isinstance(doc[0][0], six.integer_types + (np.integer,)):
                 # make sure the term IDs are ints, otherwise np will get upset
-                ids = [int(id) for id, _ in doc]
+                ids = [int(idx) for idx, _ in doc]
             else:
-                ids = [id for id, _ in doc]
+                ids = [idx for idx, _ in doc]
             cts = np.array([cnt for _, cnt in doc])
 
             # Get all authors in current document, and convert the author names to integer IDs.
@@ -406,11 +411,10 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c
             expElogbetad = self.expElogbeta[:, ids]
 
             # Compute the normalizing constant of phi for the current document.
-            phinorm = self.compute_phinorm(ids, authors_d, expElogthetad, expElogbetad)
+            phinorm = self.compute_phinorm(expElogthetad, expElogbetad)
 
             # Iterate between gamma and phi until convergence
-            for iteration in xrange(self.iterations):
-
+            for _ in xrange(self.iterations):
                 lastgamma = tilde_gamma.copy()
 
                 # Update gamma.
@@ -428,7 +432,7 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c
                 expElogthetad = np.exp(Elogthetad)
 
                 # Update the normalizing constant in phi.
-                phinorm = self.compute_phinorm(ids, authors_d, expElogthetad, expElogbetad)
+                phinorm = self.compute_phinorm(expElogthetad, expElogbetad)
 
                 # Check for convergence.
                 # Criterion is mean change in "local" gamma.
@@ -452,8 +456,10 @@ def inference(self, chunk, author2doc, doc2author, rhot, collect_sstats=False, c
                 sstats[:, ids] += np.outer(expElogtheta_sum_a.T, cts / phinorm)
 
         if len(chunk) > 1:
-            logger.debug("%i/%i documents converged within %i iterations",
-                         converged, len(chunk), self.iterations)
+            logger.debug(
+                "%i/%i documents converged within %i iterations",
+                converged, len(chunk), self.iterations
+            )
 
         if collect_sstats:
             # This step finishes computing the sufficient statistics for the
@@ -473,7 +479,10 @@ def do_estep(self, chunk, author2doc, doc2author, rhot, state=None, chunk_doc_id
         # TODO: this method is somewhat similar to the one in LdaModel. Refactor if possible.
         if state is None:
             state = self.state
-        gamma, sstats = self.inference(chunk, author2doc, doc2author, rhot, collect_sstats=True, chunk_doc_idx=chunk_doc_idx)
+        gamma, sstats = self.inference(
+            chunk, author2doc, doc2author, rhot,
+            collect_sstats=True, chunk_doc_idx=chunk_doc_idx
+        )
         state.sstats += sstats
         state.numdocs += len(chunk)
         return gamma
@@ -492,13 +501,14 @@ def log_perplexity(self, chunk, chunk_doc_idx=None, total_docs=None):
         corpus_words = sum(cnt for document in chunk for _, cnt in document)
         subsample_ratio = 1.0 * total_docs / len(chunk)
         perwordbound = self.bound(chunk, chunk_doc_idx, subsample_ratio=subsample_ratio) / (subsample_ratio * corpus_words)
-        logger.info("%.3f per-word bound, %.1f perplexity estimate based on a corpus of %i documents with %i words" %
-                    (perwordbound, np.exp2(-perwordbound), len(chunk), corpus_words))
+        logger.info(
+            "%.3f per-word bound, %.1f perplexity estimate based on a corpus of %i documents with %i words",
+            perwordbound, np.exp2(-perwordbound), len(chunk), corpus_words
+        )
         return perwordbound
 
-    def update(self, corpus=None, author2doc=None, doc2author=None, chunksize=None, decay=None, offset=None,
-            passes=None, update_every=None, eval_every=None, iterations=None,
-            gamma_threshold=None, chunks_as_numpy=False):
+    def update(self, corpus=None, author2doc=None, doc2author=None, chunksize=None, decay=None, offset=None, passes=None,
+               update_every=None, eval_every=None, iterations=None, gamma_threshold=None, chunks_as_numpy=False):
         """
         Train the model with new documents, by EM-iterating over `corpus` until
         the topics converge (or until the maximum number of allowed iterations
@@ -590,14 +600,14 @@ def update(self, corpus=None, author2doc=None, doc2author=None, chunksize=None,
             if doc2author is None:
                 doc2author = construct_doc2author(corpus, author2doc)
             elif author2doc is None:
-                author2doc = construct_author2doc(corpus, doc2author)
+                author2doc = construct_author2doc(doc2author)
 
             # Number of authors that need to be updated.
             num_input_authors = len(author2doc)
 
             try:
                 len_input_corpus = len(corpus)
-            except Exception:
+            except TypeError:
                 logger.warning("input corpus stream has no len(); counting documents")
                 len_input_corpus = sum(1 for _ in corpus)
             if len_input_corpus == 0:
@@ -650,7 +660,7 @@ def update(self, corpus=None, author2doc=None, doc2author=None, chunksize=None,
 
             # Train on all documents of authors in input_corpus.
             train_corpus_idx = []
-            for a in author2doc.keys():  # For all authors in input corpus.
+            for _ in author2doc.keys():  # For all authors in input corpus.
                 for doc_ids in self.author2doc.values():  # For all documents in total corpus.
                     train_corpus_idx.extend(doc_ids)
 
@@ -674,17 +684,15 @@ def update(self, corpus=None, author2doc=None, doc2author=None, chunksize=None,
         evalafter = min(lencorpus, (eval_every or 0) * self.numworkers * chunksize)
 
         updates_per_pass = max(1, lencorpus / updateafter)
-        logger.info("running %s author-topic training, %s topics, %s authors, %i passes over "
-                    "the supplied corpus of %i documents, updating model once "
-                    "every %i documents, evaluating perplexity every %i documents, "
-                    "iterating %ix with a convergence threshold of %f",
-                    updatetype, self.num_topics, num_input_authors, passes, lencorpus,
-                        updateafter, evalafter, iterations,
-                        gamma_threshold)
+        logger.info(
+            "running %s author-topic training, %s topics, %s authors, %i passes over the supplied corpus of %i documents, updating model once "
+            "every %i documents, evaluating perplexity every %i documents, iterating %ix with a convergence threshold of %f",
+            updatetype, self.num_topics, num_input_authors, passes, lencorpus, updateafter,
+            evalafter, iterations, gamma_threshold
+        )
 
         if updates_per_pass * passes < 10:
-            logger.warning("too few updates, training might not converge; consider "
-                           "increasing the number of passes or iterations to improve accuracy")
+            logger.warning("too few updates, training might not converge; consider increasing the number of passes or iterations to improve accuracy")
 
         # rho is the "speed" of updating; TODO try other fncs
         # pass_ + num_updates handles increasing the starting t for each pass,
@@ -694,7 +702,7 @@ def rho():
 
         for pass_ in xrange(passes):
             if self.dispatcher:
-                logger.info('initializing %s workers' % self.numworkers)
+                logger.info('initializing %s workers', self.numworkers)
                 self.dispatcher.reset(self.state)
             else:
                 # gamma is not needed in "other", thus its shape is (0, 0).
@@ -713,13 +721,17 @@ def rho():
 
                 if self.dispatcher:
                     # add the chunk to dispatcher's job queue, so workers can munch on it
-                    logger.info('PROGRESS: pass %i, dispatching documents up to #%i/%i',
-                                pass_, chunk_no * chunksize + len(chunk), lencorpus)
+                    logger.info(
+                        "PROGRESS: pass %i, dispatching documents up to #%i/%i",
+                        pass_, chunk_no * chunksize + len(chunk), lencorpus
+                    )
                     # this will eventually block until some jobs finish, because the queue has a small finite length
                     self.dispatcher.putjob(chunk)
                 else:
-                    logger.info('PROGRESS: pass %i, at document #%i/%i',
-                                pass_, chunk_no * chunksize + len(chunk), lencorpus)
+                    logger.info(
+                        "PROGRESS: pass %i, at document #%i/%i",
+                        pass_, chunk_no * chunksize + len(chunk), lencorpus
+                    )
                     # do_estep requires the indexes of the documents being trained on, to know what authors
                     # correspond to the documents.
                     gammat = self.do_estep(chunk, self.author2doc, self.doc2author, rho(), other, chunk_doc_idx)
@@ -757,8 +769,6 @@ def rho():
                     other = self.dispatcher.getstate()
                 self.do_mstep(rho(), other, pass_ > 0)
                 del other
-                dirty = False
-        # endfor entire corpus update
 
     def bound(self, chunk, chunk_doc_idx=None, subsample_ratio=1.0, author2doc=None, doc2author=None):
         """
@@ -833,11 +843,11 @@ def bound(self, chunk, chunk_doc_idx=None, subsample_ratio=1.0, author2doc=None,
 
             # Computing the bound requires summing over expElogtheta[a, k] * expElogbeta[k, v], which
             # is the same computation as in normalizing phi.
-            phinorm = self.compute_phinorm(ids, authors_d, expElogtheta[authors_d, :], expElogbeta[:, ids])
+            phinorm = self.compute_phinorm(expElogtheta[authors_d, :], expElogbeta[:, ids])
             word_score += np.log(1.0 / len(authors_d)) * sum(cts) + cts.dot(np.log(phinorm))
 
         # Compensate likelihood for when `chunk` above is only a sample of the whole corpus. This ensures
-        # that the likelihood is always rougly on the same scale.
+        # that the likelihood is always roughly on the same scale.
         word_score *= subsample_ratio
 
         # E[log p(theta | alpha) - log q(theta | gamma)]
@@ -863,12 +873,12 @@ def bound(self, chunk, chunk_doc_idx=None, subsample_ratio=1.0, author2doc=None,
         return total_score
 
     def get_document_topics(self, word_id, minimum_probability=None):
-        '''
+        """
         This method overwrites `LdaModel.get_document_topics` and simply raises an
         exception. `get_document_topics` is not valid for the author-topic model,
         use `get_author_topics` instead.
 
-        '''
+        """
 
         raise NotImplementedError('Method "get_document_topics" is not valid for the author-topic model. Use the "get_author_topics" method.')
 
@@ -891,13 +901,12 @@ def get_author_topics(self, author_name, minimum_probability=None):
 
         topic_dist = self.state.gamma[author_id, :] / sum(self.state.gamma[author_id, :])
 
-        author_topics = [(topicid, topicvalue) for topicid, topicvalue in enumerate(topic_dist)
-                if topicvalue >= minimum_probability]
+        author_topics = [(topicid, topicvalue) for topicid, topicvalue in enumerate(topic_dist) if topicvalue >= minimum_probability]
 
         return author_topics
 
     def __getitem__(self, author_names, eps=None):
-        '''
+        """
         Return topic distribution for input author as a list of
         (topic_id, topic_probabiity) 2-tuples.
 
@@ -905,7 +914,7 @@ def __getitem__(self, author_names, eps=None):
 
         Do not call this method directly, instead use `model[author_names]`.
 
-        '''
+        """
         if isinstance(author_names, list):
             items = []
             for a in author_names:
@@ -914,4 +923,3 @@ def __getitem__(self, author_names, eps=None):
             items = self.get_author_topics(author_names, minimum_probability=eps)
 
         return items
-# endclass AuthorTopicModel
diff --git a/gensim/models/callbacks.py b/gensim/models/callbacks.py
index d4598cca1d..3ef7fbe978 100644
--- a/gensim/models/callbacks.py
+++ b/gensim/models/callbacks.py
@@ -45,7 +45,8 @@ class CoherenceMetric(Metric):
     """
     Metric class for coherence evaluation
     """
-    def __init__(self, corpus=None, texts=None, dictionary=None, coherence=None, window_size=None, topn=10, logger=None, viz_env=None, title=None):
+    def __init__(self, corpus=None, texts=None, dictionary=None, coherence=None,
+                 window_size=None, topn=10, logger=None, viz_env=None, title=None):
         """
         Args:
             corpus : Gensim document corpus.
@@ -108,7 +109,10 @@ def get_value(self, **kwargs):
         self.model = None
         self.topics = None
         super(CoherenceMetric, self).set_parameters(**kwargs)
-        cm = gensim.models.CoherenceModel(self.model, self.topics, self.texts, self.corpus, self.dictionary, self.window_size, self.coherence, self.topn)
+        cm = gensim.models.CoherenceModel(
+            self.model, self.topics, self.texts, self.corpus, self.dictionary,
+            self.window_size, self.coherence, self.topn
+        )
         return cm.get_coherence()
 
 
@@ -146,7 +150,8 @@ class DiffMetric(Metric):
     """
     Metric class for topic difference evaluation
     """
-    def __init__(self, distance="jaccard", num_words=100, n_ann_terms=10, diagonal=True, annotation=False, normed=True, logger=None, viz_env=None, title=None):
+    def __init__(self, distance="jaccard", num_words=100, n_ann_terms=10, diagonal=True,
+                 annotation=False, normed=True, logger=None, viz_env=None, title=None):
         """
         Args:
             distance : measure used to calculate difference between any topic pair. Available values:
@@ -181,7 +186,10 @@ def get_value(self, **kwargs):
             other_model : second topic model instance to calculate the difference from
         """
         super(DiffMetric, self).set_parameters(**kwargs)
-        diff_diagonal, _ = self.model.diff(self.other_model, self.distance, self.num_words, self.n_ann_terms, self.diagonal, self.annotation, self.normed)
+        diff_diagonal, _ = self.model.diff(
+            self.other_model, self.distance, self.num_words, self.n_ann_terms,
+            self.diagonal, self.annotation, self.normed
+        )
         return diff_diagonal
 
 
@@ -189,7 +197,8 @@ class ConvergenceMetric(Metric):
     """
     Metric class for convergence evaluation
     """
-    def __init__(self, distance="jaccard", num_words=100, n_ann_terms=10, diagonal=True, annotation=False, normed=True, logger=None, viz_env=None, title=None):
+    def __init__(self, distance="jaccard", num_words=100, n_ann_terms=10, diagonal=True,
+                 annotation=False, normed=True, logger=None, viz_env=None, title=None):
         """
         Args:
             distance : measure used to calculate difference between any topic pair. Available values:
@@ -224,7 +233,10 @@ def get_value(self, **kwargs):
             other_model : second topic model instance to calculate the difference from
         """
         super(ConvergenceMetric, self).set_parameters(**kwargs)
-        diff_diagonal, _ = self.model.diff(self.other_model, self.distance, self.num_words, self.n_ann_terms, self.diagonal, self.annotation, self.normed)
+        diff_diagonal, _ = self.model.diff(
+            self.other_model, self.distance, self.num_words, self.n_ann_terms,
+            self.diagonal, self.annotation, self.normed
+        )
         return np.sum(diff_diagonal)
 
 
@@ -287,23 +299,33 @@ def on_epoch_end(self, epoch, topics=None):
                 if epoch == 0:
                     if value.ndim > 0:
                         diff_mat = np.array([value])
-                        viz_metric = self.viz.heatmap(X=diff_mat.T, env=metric.viz_env, opts=dict(xlabel='Epochs', ylabel=label, title=label))
+                        viz_metric = self.viz.heatmap(
+                            X=diff_mat.T, env=metric.viz_env, opts=dict(xlabel='Epochs', ylabel=label, title=label)
+                        )
                         # store current epoch's diff diagonal
                         self.diff_mat.put(diff_mat)
                         # saving initial plot window
                         self.windows.append(copy.deepcopy(viz_metric))
                     else:
-                        viz_metric = self.viz.line(Y=np.array([value]), X=np.array([epoch]), env=metric.viz_env, opts=dict(xlabel='Epochs', ylabel=label, title=label))
+                        viz_metric = self.viz.line(
+                            Y=np.array([value]), X=np.array([epoch]), env=metric.viz_env,
+                            opts=dict(xlabel='Epochs', ylabel=label, title=label)
+                        )
                         # saving initial plot window
                         self.windows.append(copy.deepcopy(viz_metric))
                 else:
                     if value.ndim > 0:
                         # concatenate with previous epoch's diff diagonals
                         diff_mat = np.concatenate((self.diff_mat.get(), np.array([value])))
-                        self.viz.heatmap(X=diff_mat.T, env=metric.viz_env, win=self.windows[i], opts=dict(xlabel='Epochs', ylabel=label, title=label))
+                        self.viz.heatmap(
+                            X=diff_mat.T, env=metric.viz_env, win=self.windows[i],
+                            opts=dict(xlabel='Epochs', ylabel=label, title=label)
+                        )
                         self.diff_mat.put(diff_mat)
                     else:
-                        self.viz.updateTrace(Y=np.array([value]), X=np.array([epoch]), env=metric.viz_env, win=self.windows[i])
+                        self.viz.updateTrace(
+                            Y=np.array([value]), X=np.array([epoch]), env=metric.viz_env, win=self.windows[i]
+                        )
 
             if metric.logger == "shell":
                 statement = "".join(("Epoch ", str(epoch), ": ", label, " estimate: ", str(value)))
diff --git a/gensim/models/coherencemodel.py b/gensim/models/coherencemodel.py
index 4fa80ea15e..f6781903a8 100644
--- a/gensim/models/coherencemodel.py
+++ b/gensim/models/coherencemodel.py
@@ -168,7 +168,8 @@ def __init__(self, model=None, topics=None, texts=None, corpus=None, dictionary=
             if isinstance(model.id2word, FakeDict):
                 raise ValueError(
                     "The associated dictionary should be provided with the corpus or 'id2word'"
-                    " for topic model should be set as the associated dictionary.")
+                    " for topic model should be set as the associated dictionary."
+                )
             else:
                 self.dictionary = model.id2word
         else:
@@ -236,9 +237,7 @@ def topics(self, topics):
         if self.model is not None:
             new_topics = self._get_topics()
             if topics is not None:
-                logger.warning(
-                    "Ignoring topics you are attempting to set in favor of model's topics: %s",
-                    self.model)
+                logger.warning("Ignoring topics you are attempting to set in favor of model's topics: %s", self.model)
         elif topics is not None:
             new_topics = []
             for topic in topics:
@@ -275,7 +274,8 @@ def _get_topics(self):
         except AttributeError:
             raise ValueError(
                 "This topic model is not currently supported. Supported topic models"
-                " should implement the `get_topics` method.")
+                " should implement the `get_topics` method."
+            )
 
     def segment_topics(self):
         return self.measure.seg(self.topics)
@@ -294,7 +294,8 @@ def estimate_probabilities(self, segmented_topics=None):
             self._accumulator = self.measure.prob(
                 texts=self.texts, segmented_topics=segmented_topics,
                 dictionary=self.dictionary, window_size=self.window_size,
-                processes=self.processes)
+                processes=self.processes
+            )
 
         return self._accumulator
 
diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py
index 5e0dc20418..04bf923c65 100644
--- a/gensim/models/doc2vec.py
+++ b/gensim/models/doc2vec.py
@@ -70,9 +70,9 @@
 try:
     from gensim.models.doc2vec_inner import train_document_dbow, train_document_dm, train_document_dm_concat
     from gensim.models.word2vec_inner import FAST_VERSION  # blas-adaptation shared from word2vec
-    logger.debug('Fast version of {0} is being used'.format(__name__))
+    logger.debug('Fast version of %s is being used', __name__)
 except ImportError:
-    logger.warning('Slow version of {0} is being used'.format(__name__))
+    logger.warning('Slow version of %s is being used', __name__)
     # failed... fall back to plain numpy (20-80x slower training than the above)
     FAST_VERSION = -1
 
@@ -109,9 +109,10 @@ def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None,
             train_batch_sg(model, [doc_words], alpha, work)
         for doctag_index in doctag_indexes:
             for word in doc_words:
-                train_sg_pair(model, word, doctag_index, alpha, learn_vectors=learn_doctags,
-                              learn_hidden=learn_hidden, context_vectors=doctag_vectors,
-                              context_locks=doctag_locks)
+                train_sg_pair(
+                    model, word, doctag_index, alpha, learn_vectors=learn_doctags, learn_hidden=learn_hidden,
+                    context_vectors=doctag_vectors, context_locks=doctag_locks
+                )
 
         return len(doc_words)
 
@@ -173,9 +174,9 @@ def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=N
 
         return len(word_vocabs)
 
-    def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None,
-                                 learn_doctags=True, learn_words=True, learn_hidden=True,
-                                 word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None):
+    def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True,
+                                 learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None,
+                                 doctag_vectors=None, doctag_locks=None):
         """
         Update distributed memory model ("PV-DM") by training on a single document, using a
         concatenation of the context window word vectors (rather than a sum or average).
@@ -382,10 +383,8 @@ def estimated_lookup_memory(self):
     def reset_weights(self, model):
         length = max(len(self.doctags), self.count)
         if self.mapfile_path:
-            self.doctag_syn0 = np_memmap(self.mapfile_path + '.doctag_syn0', dtype=REAL,
-                                         mode='w+', shape=(length, model.vector_size))
-            self.doctag_syn0_lockf = np_memmap(self.mapfile_path + '.doctag_syn0_lockf', dtype=REAL,
-                                               mode='w+', shape=(length,))
+            self.doctag_syn0 = np_memmap(self.mapfile_path + '.doctag_syn0', dtype=REAL, mode='w+', shape=(length, model.vector_size))
+            self.doctag_syn0_lockf = np_memmap(self.mapfile_path + '.doctag_syn0_lockf', dtype=REAL, mode='w+', shape=(length,))
             self.doctag_syn0_lockf.fill(1.0)
         else:
             self.doctag_syn0 = empty((length, model.vector_size), dtype=REAL)
@@ -481,7 +480,11 @@ def most_similar(self, positive=None, negative=None, topn=10, clip_start=0, clip
             return dists
         best = matutils.argsort(dists, topn=topn + len(all_docs), reverse=True)
         # ignore (don't return) docs from the input
-        result = [(self.index_to_doctag(sim + clip_start), float(dists[sim])) for sim in best if (sim + clip_start) not in all_docs]
+        result = [
+            (self.index_to_doctag(sim + clip_start), float(dists[sim]))
+            for sim in best
+            if (sim + clip_start) not in all_docs
+        ]
         return result[:topn]
 
     def doesnt_match(self, docs):
@@ -494,7 +497,7 @@ def doesnt_match(self, docs):
         self.init_sims()
 
         docs = [doc for doc in docs if doc in self.doctags or 0 <= doc < self.count]  # filter out unknowns
-        logger.debug("using docs %s" % docs)
+        logger.debug("using docs %s", docs)
         if not docs:
             raise ValueError("cannot select a doc from an empty list")
         vectors = vstack(self.doctag_syn0norm[self._int_index(doc)] for doc in docs).astype(REAL)
@@ -551,8 +554,7 @@ def repeat(self, word_count):
 class Doc2Vec(Word2Vec):
     """Class for training, using and evaluating neural networks described in http://arxiv.org/pdf/1405.4053v2.pdf"""
 
-    def __init__(self, documents=None, dm_mean=None,
-                 dm=1, dbow_words=0, dm_concat=0, dm_tag_count=1,
+    def __init__(self, documents=None, dm_mean=None, dm=1, dbow_words=0, dm_concat=0, dm_tag_count=1,
                  docvecs=None, docvecs_mapfile=None, comment=None, trim_rule=None, **kwargs):
         """
         Initialize the model from an iterable of `documents`. Each document is a
@@ -664,7 +666,7 @@ def reset_weights(self):
         if self.dm and self.dm_concat:
             # expand l1 size to match concatenated tags+words length
             self.layer1_size = (self.dm_tag_count + (2 * self.window)) * self.vector_size
-            logger.info("using concatenative %d-dimensional layer1" % (self.layer1_size))
+            logger.info("using concatenative %d-dimensional layer1", self.layer1_size)
         super(Doc2Vec, self).reset_weights()
         self.docvecs.reset_weights(self)
 
@@ -686,14 +688,16 @@ def scan_vocab(self, documents, progress_per=10000, trim_rule=None, update=False
             if not checked_string_types:
                 if isinstance(document.words, string_types):
                     logger.warning(
-                        "Each 'words' should be a list of words (usually unicode strings)."
-                        "First 'words' here is instead plain %s." % type(document.words)
+                        "Each 'words' should be a list of words (usually unicode strings). First 'words' here is instead plain %s.",
+                        type(document.words)
                     )
                 checked_string_types += 1
             if document_no % progress_per == 0:
                 interval_rate = (total_words - interval_count) / (default_timer() - interval_start)
-                logger.info("PROGRESS: at example #%i, processed %i words (%i/s), %i word types, %i tags",
-                            document_no, total_words, interval_rate, len(vocab), len(self.docvecs))
+                logger.info(
+                    "PROGRESS: at example #%i, processed %i words (%i/s), %i word types, %i tags",
+                    document_no, total_words, interval_rate, len(vocab), len(self.docvecs)
+                )
                 interval_start = default_timer()
                 interval_count = total_words
             document_length = len(document.words)
@@ -709,8 +713,10 @@ def scan_vocab(self, documents, progress_per=10000, trim_rule=None, update=False
                 utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
                 min_reduce += 1
 
-        logger.info("collected %i word types and %i unique tags from a corpus of %i examples and %i words",
-                    len(vocab), len(self.docvecs), document_no + 1, total_words)
+        logger.info(
+            "collected %i word types and %i unique tags from a corpus of %i examples and %i words",
+            len(vocab), len(self.docvecs), document_no + 1, total_words
+        )
         self.corpus_count = document_no + 1
         self.raw_vocab = vocab
 
@@ -721,15 +727,20 @@ def _do_train_job(self, job, alpha, inits):
             indexed_doctags = self.docvecs.indexed_doctags(doc.tags)
             doctag_indexes, doctag_vectors, doctag_locks, ignored = indexed_doctags
             if self.sg:
-                tally += train_document_dbow(self, doc.words, doctag_indexes, alpha, work,
-                                             train_words=self.dbow_words,
-                                             doctag_vectors=doctag_vectors, doctag_locks=doctag_locks)
+                tally += train_document_dbow(
+                    self, doc.words, doctag_indexes, alpha, work, train_words=self.dbow_words,
+                    doctag_vectors=doctag_vectors, doctag_locks=doctag_locks
+                )
             elif self.dm_concat:
-                tally += train_document_dm_concat(self, doc.words, doctag_indexes, alpha, work, neu1,
-                                                  doctag_vectors=doctag_vectors, doctag_locks=doctag_locks)
+                tally += train_document_dm_concat(
+                    self, doc.words, doctag_indexes, alpha, work, neu1,
+                    doctag_vectors=doctag_vectors, doctag_locks=doctag_locks
+                )
             else:
-                tally += train_document_dm(self, doc.words, doctag_indexes, alpha, work, neu1,
-                                           doctag_vectors=doctag_vectors, doctag_locks=doctag_locks)
+                tally += train_document_dm(
+                    self, doc.words, doctag_indexes, alpha, work, neu1,
+                    doctag_vectors=doctag_vectors, doctag_locks=doctag_locks
+                )
             self.docvecs.trained_item(indexed_doctags)
         return tally, self._raw_word_count(job)
 
@@ -754,17 +765,20 @@ def infer_vector(self, doc_words, alpha=0.1, min_alpha=0.0001, steps=5):
 
         for i in range(steps):
             if self.sg:
-                train_document_dbow(self, doc_words, doctag_indexes, alpha, work,
-                                    learn_words=False, learn_hidden=False,
-                                    doctag_vectors=doctag_vectors, doctag_locks=doctag_locks)
+                train_document_dbow(
+                    self, doc_words, doctag_indexes, alpha, work,
+                    learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks
+                )
             elif self.dm_concat:
-                train_document_dm_concat(self, doc_words, doctag_indexes, alpha, work, neu1,
-                                         learn_words=False, learn_hidden=False,
-                                         doctag_vectors=doctag_vectors, doctag_locks=doctag_locks)
+                train_document_dm_concat(
+                    self, doc_words, doctag_indexes, alpha, work, neu1,
+                    learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks
+                )
             else:
-                train_document_dm(self, doc_words, doctag_indexes, alpha, work, neu1,
-                                  learn_words=False, learn_hidden=False,
-                                  doctag_vectors=doctag_vectors, doctag_locks=doctag_locks)
+                train_document_dm(
+                    self, doc_words, doctag_indexes, alpha, work, neu1,
+                    learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks
+                )
             alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha
 
         return doctag_vectors[0]
@@ -850,7 +864,7 @@ def save_word2vec_format(self, fname, doctag_vec=False, word_vec=True, prefix='*
             with utils.smart_open(fname, 'ab') as fout:
                 if not word_vec:
                     total_vec = len(self.docvecs)
-                    logger.info("storing %sx%s projection weights into %s" % (total_vec, self.vector_size, fname))
+                    logger.info("storing %sx%s projection weights into %s", total_vec, self.vector_size, fname)
                     fout.write(utils.to_utf8("%s %s\n" % (total_vec, self.vector_size)))
                 # store as in input order
                 for i in range(len(self.docvecs)):
diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx
index 2c0b2c5655..c4fee9ed0d 100644
--- a/gensim/models/doc2vec_inner.pyx
+++ b/gensim/models/doc2vec_inner.pyx
@@ -640,7 +640,7 @@ def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None,
                     window_indexes[n] =  null_word_index
                 else:
                     window_indexes[n] = indexes[m]
-                n = n + 1
+                n += 1
             for m in range(2 * window):
                 memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size],
                        vector_size * cython.sizeof(REAL_t))
diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py
index b26c8fa639..ebae837b57 100755
--- a/gensim/models/hdpmodel.py
+++ b/gensim/models/hdpmodel.py
@@ -196,7 +196,7 @@ def inference(self, chunk):
             raise RuntimeError("model must be trained to perform inference")
         chunk = list(chunk)
         if len(chunk) > 1:
-            logger.debug("performing inference on a chunk of %i documents" % len(chunk))
+            logger.debug("performing inference on a chunk of %i documents", len(chunk))
 
         gamma = np.zeros((len(chunk), self.lda_beta.shape[0]))
         for d, doc in enumerate(chunk):
@@ -214,8 +214,7 @@ def __getitem__(self, bow, eps=0.01):
 
         gamma = self.inference([bow])[0]
         topic_dist = gamma / sum(gamma) if sum(gamma) != 0 else []
-        return [(topicid, topicvalue) for topicid, topicvalue in enumerate(topic_dist)
-                if topicvalue >= eps]
+        return [(topicid, topicvalue) for topicid, topicvalue in enumerate(topic_dist) if topicvalue >= eps]
 
     def update(self, corpus):
         save_freq = max(1, int(10000 / self.chunksize))  # save every 10k docs, roughly
@@ -265,7 +264,7 @@ def update_chunk(self, chunk, update=True, opt_o=True):
                     unique_words[word_id] = len(unique_words)
                     word_list.append(word_id)
 
-        Wt = len(word_list)  # length of words in these documents
+        wt = len(word_list)  # length of words in these documents
 
         # ...and do the lazy updates on the necessary columns of lambda
         rw = np.array([self.m_r[t] for t in self.m_timestamp[word_list]])
@@ -274,7 +273,7 @@ def update_chunk(self, chunk, update=True, opt_o=True):
             psi(self.m_eta + self.m_lambda[:, word_list]) - \
             psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])
 
-        ss = SuffStats(self.m_T, Wt, len(chunk))
+        ss = SuffStats(self.m_T, wt, len(chunk))
 
         Elogsticks_1st = expect_log_sticks(self.m_var_sticks)  # global sticks
 
@@ -285,19 +284,19 @@ def update_chunk(self, chunk, update=True, opt_o=True):
             if len(doc) > 0:
                 doc_word_ids, doc_word_counts = zip(*doc)
                 doc_score = self.doc_e_step(
-                    doc, ss, Elogsticks_1st,
-                    word_list, unique_words, doc_word_ids,
-                    doc_word_counts, self.m_var_converge)
+                    ss, Elogsticks_1st,
+                    unique_words, doc_word_ids,
+                    doc_word_counts, self.m_var_converge
+                )
                 count += sum(doc_word_counts)
                 score += doc_score
 
         if update:
             self.update_lambda(ss, word_list, opt_o)
 
-        return (score, count)
+        return score, count
 
-    def doc_e_step(self, doc, ss, Elogsticks_1st, word_list,
-                   unique_words, doc_word_ids, doc_word_counts, var_converge):
+    def doc_e_step(self, ss, Elogsticks_1st, unique_words, doc_word_ids, doc_word_counts, var_converge):
         """
         e step for a single doc
         """
@@ -392,8 +391,7 @@ def update_lambda(self, sstats, word_list, opt_o):
         self.m_rhot = rhot
 
         # Update appropriate columns of lambda based on documents.
-        self.m_lambda[:, word_list] = self.m_lambda[:, word_list] * (1 - rhot) + \
-            rhot * self.m_D * sstats.m_var_beta_ss / sstats.m_chunksize
+        self.m_lambda[:, word_list] = self.m_lambda[:, word_list] * (1 - rhot) + rhot * self.m_D * sstats.m_var_beta_ss / sstats.m_chunksize
         self.m_lambda_sum = (1 - rhot) * self.m_lambda_sum + \
             rhot * self.m_D * np.sum(sstats.m_var_beta_ss, axis=1) / sstats.m_chunksize
 
@@ -401,8 +399,7 @@ def update_lambda(self, sstats, word_list, opt_o):
         self.m_timestamp[word_list] = self.m_updatect
         self.m_r.append(self.m_r[-1] + np.log(1 - rhot))
 
-        self.m_varphi_ss = (1.0 - rhot) * self.m_varphi_ss + rhot * \
-            sstats.m_var_sticks_ss * self.m_D / sstats.m_chunksize
+        self.m_varphi_ss = (1.0 - rhot) * self.m_varphi_ss + rhot * sstats.m_var_sticks_ss * self.m_D / sstats.m_chunksize
 
         if opt_o:
             self.optimal_ordering()
@@ -431,10 +428,8 @@ def update_expectations(self):
         topics we've learned we'll get the correct behavior.
         """
         for w in xrange(self.m_W):
-            self.m_lambda[:, w] *= np.exp(self.m_r[-1] -
-                                          self.m_r[self.m_timestamp[w]])
-        self.m_Elogbeta = psi(self.m_eta + self.m_lambda) - \
-            psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])
+            self.m_lambda[:, w] *= np.exp(self.m_r[-1] - self.m_r[self.m_timestamp[w]])
+        self.m_Elogbeta = psi(self.m_eta + self.m_lambda) - psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])
 
         self.m_timestamp[:] = self.m_updatect
         self.m_status_up_to_date = True
@@ -448,8 +443,10 @@ def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words=No
 
         """
         if num_words is not None:  # deprecated num_words is used
-            logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
-            logger.warning("Please use topn instead.")
+            logger.warning(
+                "The parameter num_words for show_topic() would be deprecated in the updated version. "
+                "Please use topn instead."
+            )
             topn = num_words
 
         if not self.m_status_up_to_date:
@@ -492,7 +489,7 @@ def save_topics(self, doc_count=None):
         else:
             fname = 'doc-%i' % doc_count
         fname = '%s/%s.topics' % (self.outputdir, fname)
-        logger.info("saving topics to %s" % fname)
+        logger.info("saving topics to %s", fname)
         betas = self.m_lambda + self.m_eta
         np.savetxt(fname, betas)
 
@@ -527,13 +524,12 @@ def hdp_to_lda(self):
             alpha[i] = sticks[i] * left
             left = left - alpha[i]
         alpha[self.m_T - 1] = left
-        alpha = alpha * self.m_alpha
+        alpha *= self.m_alpha
 
         # beta
-        beta = (self.m_lambda + self.m_eta) / (self.m_W * self.m_eta +
-                self.m_lambda_sum[:, np.newaxis])
+        beta = (self.m_lambda + self.m_eta) / (self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])
 
-        return (alpha, beta)
+        return alpha, beta
 
     def suggested_lda_model(self):
         """
@@ -560,12 +556,14 @@ def evaluate_test_corpus(self, corpus):
                 lda_betad = self.lda_beta[:, doc_word_ids]
                 log_predicts = np.log(np.dot(theta, lda_betad))
                 doc_score = sum(log_predicts) / len(doc)
-                logger.info('TEST: %6d    %.5f' % (i, doc_score))
+                logger.info('TEST: %6d    %.5f', i, doc_score)
                 score += likelihood
                 total_words += sum(doc_word_counts)
-        logger.info('TEST: average score: %.5f, total score: %.5f,  test docs: %d' % (score / total_words, score, len(corpus)))
+        logger.info(
+            "TEST: average score: %.5f, total score: %.5f,  test docs: %d",
+            score / total_words, score, len(corpus)
+        )
         return score
-# endclass HdpModel
 
 
 class HdpTopicFormatter(object):
@@ -627,14 +625,20 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
 
     def print_topic(self, topic_id, topn=None, num_words=None):
         if num_words is not None:  # deprecated num_words is used
-            warnings.warn("The parameter num_words for print_topic() would be deprecated in the updated version. Please use topn instead.")
+            warnings.warn(
+                "The parameter num_words for print_topic() would be deprecated in the updated version. "
+                "Please use topn instead."
+            )
             topn = num_words
 
         return self.show_topic(topic_id, topn, formatted=True)
 
     def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words=None,):
         if num_words is not None:  # deprecated num_words is used
-            warnings.warn("The parameter num_words for show_topic() would be deprecated in the updated version. Please use topn instead.")
+            warnings.warn(
+                "The parameter num_words for show_topic() would be deprecated in the updated version. "
+                "Please use topn instead."
+            )
             topn = num_words
 
         lambdak = list(self.data[topic_id, :])
@@ -668,4 +672,3 @@ def format_topic(self, topic_id, topic_terms):
 
         fmt = (topic_id, fmt)
         return fmt
-# endclass HdpTopicFormatter
diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
index c2dfd419cb..75cc11ee69 100644
--- a/gensim/models/keyedvectors.py
+++ b/gensim/models/keyedvectors.py
@@ -146,11 +146,11 @@ def save_word2vec_format(self, fname, fvocab=None, binary=False, total_vec=None)
             total_vec = len(self.vocab)
         vector_size = self.syn0.shape[1]
         if fvocab is not None:
-            logger.info("storing vocabulary in %s" % (fvocab))
+            logger.info("storing vocabulary in %s", fvocab)
             with utils.smart_open(fvocab, 'wb') as vout:
                 for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count):
                     vout.write(utils.to_utf8("%s %s\n" % (word, vocab.count)))
-        logger.info("storing %sx%s projection weights into %s" % (total_vec, vector_size, fname))
+        logger.info("storing %sx%s projection weights into %s", total_vec, vector_size, fname)
         assert (len(self.vocab), vector_size) == self.syn0.shape
         with utils.smart_open(fname, 'wb') as fout:
             fout.write(utils.to_utf8("%s %s\n" % (total_vec, vector_size)))
@@ -205,7 +205,7 @@ def load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8',
         logger.info("loading projection weights from %s", fname)
         with utils.smart_open(fname) as fin:
             header = utils.to_unicode(fin.readline(), encoding=encoding)
-            vocab_size, vector_size = map(int, header.split())  # throws for invalid file format
+            vocab_size, vector_size = (int(x) for x in header.split())  # throws for invalid file format
             if limit:
                 vocab_size = min(vocab_size, limit)
             result = cls()
@@ -232,7 +232,7 @@ def add_word(word, weights):
 
             if binary:
                 binary_len = dtype(REAL).itemsize * vector_size
-                for line_no in xrange(vocab_size):
+                for _ in xrange(vocab_size):
                     # mixed text and binary: read text first, then binary
                     word = []
                     while True:
@@ -253,8 +253,8 @@ def add_word(word, weights):
                         raise EOFError("unexpected end of input; is count incorrect or file otherwise damaged?")
                     parts = utils.to_unicode(line.rstrip(), encoding=encoding, errors=unicode_errors).split(" ")
                     if len(parts) != vector_size + 1:
-                        raise ValueError("invalid vector on line %s (is this really the text format?)" % (line_no))
-                    word, weights = parts[0], list(map(REAL, parts[1:]))
+                        raise ValueError("invalid vector on line %s (is this really the text format?)" % line_no)
+                    word, weights = parts[0], [REAL(x) for x in parts[1:]]
                     add_word(word, weights)
         if result.syn0.shape[0] != len(result.vocab):
             logger.info(
@@ -264,7 +264,7 @@ def add_word(word, weights):
             result.syn0 = ascontiguousarray(result.syn0[: len(result.vocab)])
         assert (len(result.vocab), vector_size) == result.syn0.shape
 
-        logger.info("loaded %s matrix from %s" % (result.syn0.shape, fname))
+        logger.info("loaded %s matrix from %s", result.syn0.shape, fname)
         return result
 
     def word_vec(self, word, use_norm=False):
@@ -400,12 +400,13 @@ def wmdistance(self, document1, document2):
         diff1 = len_pre_oov1 - len(document1)
         diff2 = len_pre_oov2 - len(document2)
         if diff1 > 0 or diff2 > 0:
-            logger.info('Removed %d and %d OOV words from document 1 and 2 (respectively).',
-                        diff1, diff2)
+            logger.info('Removed %d and %d OOV words from document 1 and 2 (respectively).', diff1, diff2)
 
         if len(document1) == 0 or len(document2) == 0:
-            logger.info('At least one of the documents had no words that were'
-                        'in the vocabulary. Aborting (returning inf).')
+            logger.info(
+                "At least one of the documents had no words that werein the vocabulary. "
+                "Aborting (returning inf)."
+            )
             return float('inf')
 
         dictionary = Dictionary(documents=[document1, document2])
@@ -481,8 +482,10 @@ def most_similar_cosmul(self, positive=None, negative=None, topn=10):
             # allow calls like most_similar_cosmul('dog'), as a shorthand for most_similar_cosmul(['dog'])
             positive = [positive]
 
-        all_words = set([self.vocab[word].index for word in positive + negative
-            if not isinstance(word, ndarray) and word in self.vocab])
+        all_words = {
+            self.vocab[word].index for word in positive + negative
+            if not isinstance(word, ndarray) and word in self.vocab
+            }
 
         positive = [
             self.word_vec(word, use_norm=True) if isinstance(word, string_types) else word
@@ -638,16 +641,16 @@ def n_similarity(self, ws1, ws2):
             raise ZeroDivisionError('Atleast one of the passed list is empty.')
         v1 = [self[word] for word in ws1]
         v2 = [self[word] for word in ws2]
-        return dot(matutils.unitvec(array(v1).mean(axis=0)),
-                   matutils.unitvec(array(v2).mean(axis=0)))
+        return dot(matutils.unitvec(array(v1).mean(axis=0)), matutils.unitvec(array(v2).mean(axis=0)))
 
     @staticmethod
     def log_accuracy(section):
         correct, incorrect = len(section['correct']), len(section['incorrect'])
         if correct + incorrect > 0:
-            logger.info("%s: %.1f%% (%i/%i)" %
-                        (section['section'], 100.0 * correct / (correct + incorrect),
-                         correct, correct + incorrect))
+            logger.info(
+                "%s: %.1f%% (%i/%i)",
+                section['section'], 100.0 * correct / (correct + incorrect), correct, correct + incorrect
+            )
 
     def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar, case_insensitive=True):
         """
@@ -672,7 +675,7 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar, c
 
         """
         ok_vocab = [(w, self.vocab[w]) for w in self.index2word[:restrict_vocab]]
-        ok_vocab = dict((w.upper(), v) for w, v in reversed(ok_vocab)) if case_insensitive else dict(ok_vocab)
+        ok_vocab = {w.upper(): v for w, v in reversed(ok_vocab)} if case_insensitive else dict(ok_vocab)
 
         sections, section = [], None
         for line_no, line in enumerate(utils.smart_open(questions)):
@@ -692,16 +695,16 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar, c
                         a, b, c, expected = [word.upper() for word in line.split()]
                     else:
                         a, b, c, expected = [word for word in line.split()]
-                except Exception:
-                    logger.info("skipping invalid line #%i in %s" % (line_no, questions))
+                except ValueError:
+                    logger.info("skipping invalid line #%i in %s", line_no, questions)
                     continue
                 if a not in ok_vocab or b not in ok_vocab or c not in ok_vocab or expected not in ok_vocab:
-                    logger.debug("skipping line #%i with OOV words: %s" % (line_no, line.strip()))
+                    logger.debug("skipping line #%i with OOV words: %s", line_no, line.strip())
                     continue
 
                 original_vocab = self.vocab
                 self.vocab = ok_vocab
-                ignore = set([a, b, c])  # input words to be ignored
+                ignore = {a, b, c}  # input words to be ignored
                 predicted = None
                 # find the most likely prediction, ignoring OOV words and input words
                 sims = most_similar(self, positive=[b, c], negative=[a], topn=False, restrict_vocab=restrict_vocab)
@@ -736,8 +739,7 @@ def log_evaluate_word_pairs(pearson, spearman, oov, pairs):
         logger.info('Spearman rank-order correlation coefficient against %s: %.4f', pairs, spearman[0])
         logger.info('Pairs with unknown words ratio: %.1f%%', oov)
 
-    def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case_insensitive=True,
-                            dummy4unknown=False):
+    def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case_insensitive=True, dummy4unknown=False):
         """
         Compute correlation of the model with human similarity judgments. `pairs` is a filename of a dataset where
         lines are 3-tuples, each consisting of a word pair and a similarity value, separated by `delimiter`.
@@ -762,7 +764,7 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
         Otherwise (default False), these pairs are skipped entirely.
         """
         ok_vocab = [(w, self.vocab[w]) for w in self.index2word[:restrict_vocab]]
-        ok_vocab = dict((w.upper(), v) for w, v in reversed(ok_vocab)) if case_insensitive else dict(ok_vocab)
+        ok_vocab = {w.upper(): v for w, v in reversed(ok_vocab)} if case_insensitive else dict(ok_vocab)
 
         similarity_gold = []
         similarity_model = []
@@ -783,7 +785,7 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
                     else:
                         a, b, sim = [word for word in line.split(delimiter)]
                     sim = float(sim)
-                except Exception:
+                except (ValueError, TypeError):
                     logger.info('skipping invalid line #%d in %s', line_no, pairs)
                     continue
                 if a not in ok_vocab or b not in ok_vocab:
@@ -802,15 +804,12 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
         pearson = stats.pearsonr(similarity_gold, similarity_model)
         oov_ratio = float(oov) / (len(similarity_gold) + oov) * 100
 
-        logger.debug(
-            'Pearson correlation coefficient against %s: %f with p-value %f',
-            pairs, pearson[0], pearson[1]
-        )
+        logger.debug('Pearson correlation coefficient against %s: %f with p-value %f', pairs, pearson[0], pearson[1])
         logger.debug(
             'Spearman rank-order correlation coefficient against %s: %f with p-value %f',
             pairs, spearman[0], spearman[1]
         )
-        logger.debug('Pairs with unknown words: %d' % oov)
+        logger.debug('Pairs with unknown words: %d', oov)
         self.log_evaluate_word_pairs(pearson, spearman, oov_ratio, pairs)
         return pearson, spearman, oov_ratio
 
@@ -844,5 +843,8 @@ def get_embedding_layer(self, train_embeddings=False):
 
         # set `trainable` as `False` to use the pretrained word embedding
         # No extra mem usage here as `Embedding` layer doesn't create any new matrix for weights
-        layer = Embedding(input_dim=weights.shape[0], output_dim=weights.shape[1], weights=[weights], trainable=train_embeddings)
+        layer = Embedding(
+            input_dim=weights.shape[0], output_dim=weights.shape[1],
+            weights=[weights], trainable=train_embeddings
+        )
         return layer
diff --git a/gensim/models/lda_dispatcher.py b/gensim/models/lda_dispatcher.py
index 91e7f237c7..6b3bd53c44 100755
--- a/gensim/models/lda_dispatcher.py
+++ b/gensim/models/lda_dispatcher.py
@@ -85,11 +85,11 @@ def initialize(self, **model_params):
                     worker = Pyro4.Proxy(uri)
                     workerid = len(self.workers)
                     # make time consuming methods work asynchronously
-                    logger.info("registering worker #%i at %s" % (workerid, uri))
+                    logger.info("registering worker #%i at %s", workerid, uri)
                     worker.initialize(workerid, dispatcher=self.callback, **model_params)
                     self.workers[workerid] = worker
                 except Pyro4.errors.PyroError:
-                    logger.warning("unresponsive worker at %s, deleting it from the name server" % uri)
+                    logger.warning("unresponsive worker at %s, deleting it from the name server", uri)
                     ns.remove(name)
 
         if not self.workers:
@@ -104,16 +104,16 @@ def getworkers(self):
 
     @Pyro4.expose
     def getjob(self, worker_id):
-        logger.info("worker #%i requesting a new job" % worker_id)
+        logger.info("worker #%i requesting a new job", worker_id)
         job = self.jobs.get(block=True, timeout=1)
-        logger.info("worker #%i got a new job (%i left)" % (worker_id, self.jobs.qsize()))
+        logger.info("worker #%i got a new job (%i left)", worker_id, self.jobs.qsize())
         return job
 
     @Pyro4.expose
     def putjob(self, job):
         self._jobsreceived += 1
         self.jobs.put(job, block=True, timeout=HUGE_TIMEOUT)
-        logger.info("added a new job (len(queue)=%i items)" % self.jobs.qsize())
+        logger.info("added a new job (len(queue)=%i items)", self.jobs.qsize())
 
     @Pyro4.expose
     def getstate(self):
@@ -121,11 +121,11 @@ def getstate(self):
         Merge states from across all workers and return the result.
         """
         logger.info("end of input, assigning all remaining jobs")
-        logger.debug("jobs done: %s, jobs received: %s" % (self._jobsdone, self._jobsreceived))
+        logger.debug("jobs done: %s, jobs received: %s", self._jobsdone, self._jobsreceived)
         while self._jobsdone < self._jobsreceived:
             time.sleep(0.5)  # check every half a second
 
-        logger.info("merging states from %i workers" % len(self.workers))
+        logger.info("merging states from %i workers", len(self.workers))
         workers = list(self.workers.values())
         result = workers[0].getstate()
         for worker in workers[1:]:
@@ -140,7 +140,7 @@ def reset(self, state):
         Initialize all workers for a new EM iterations.
         """
         for workerid, worker in iteritems(self.workers):
-            logger.info("resetting worker %s" % workerid)
+            logger.info("resetting worker %s", workerid)
             worker.reset(state)
             worker.requestjob()
         self._jobsdone = 0
@@ -158,7 +158,7 @@ def jobdone(self, workerid):
         and `worker.requestjob()`.
         """
         self._jobsdone += 1
-        logger.info("worker #%s finished job #%i" % (workerid, self._jobsdone))
+        logger.info("worker #%s finished job #%i", workerid, self._jobsdone)
         self.workers[workerid].requestjob()  # tell the worker to ask for another job, asynchronously (one-way)
 
     def jobsdone(self):
@@ -171,7 +171,7 @@ def exit(self):
         Terminate all registered workers and then the dispatcher.
         """
         for workerid, worker in iteritems(self.workers):
-            logger.info("terminating worker %s" % workerid)
+            logger.info("terminating worker %s", workerid)
             worker.exit()
         logger.info("terminating dispatcher")
         os._exit(0)  # exit the whole process (not just this thread ala sys.exit())
@@ -180,27 +180,25 @@ def exit(self):
 
 def main():
     parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--maxsize", help="How many jobs (=chunks of N documents) "
-                                           "to keep 'pre-fetched' in a queue (default: %(default)s)",
-                        type=int, default=MAX_JOBS_QUEUE)
+    parser.add_argument("--maxsize", help="How many jobs (=chunks of N documents) to keep 'pre-fetched' in a queue (default: %(default)s)", type=int, default=MAX_JOBS_QUEUE)
     parser.add_argument("--host", help="Nameserver hostname (default: %(default)s)", default=None)
     parser.add_argument("--port", help="Nameserver port (default: %(default)s)", default=None, type=int)
     parser.add_argument("--no-broadcast", help="Disable broadcast (default: %(default)s)",
                         action='store_const', default=True, const=False)
     parser.add_argument("--hmac", help="Nameserver hmac key (default: %(default)s)", default=None)
-    parser.add_argument('-v', '--verbose', help='Verbose flag', action='store_const', dest="loglevel",
-                        const=logging.INFO, default=logging.WARNING)
+    parser.add_argument('-v', '--verbose', help='Verbose flag', action='store_const', dest="loglevel", const=logging.INFO, default=logging.WARNING)
     args = parser.parse_args()
 
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=args.loglevel)
     logger.info("running %s", " ".join(sys.argv))
 
-    ns_conf = {"broadcast": args.no_broadcast,
-               "host": args.host,
-               "port": args.port,
-               "hmac_key": args.hmac}
+    ns_conf = {
+        "broadcast": args.no_broadcast,
+        "host": args.host,
+        "port": args.port,
+        "hmac_key": args.hmac
+    }
     utils.pyro_daemon(LDA_DISPATCHER_PREFIX, Dispatcher(maxsize=args.maxsize, ns_conf=ns_conf), ns_conf=ns_conf)
-
     logger.info("finished running %s", " ".join(sys.argv))
 
 
diff --git a/gensim/models/lda_worker.py b/gensim/models/lda_worker.py
index ec87c29148..8656672db6 100755
--- a/gensim/models/lda_worker.py
+++ b/gensim/models/lda_worker.py
@@ -50,7 +50,7 @@ def initialize(self, myid, dispatcher, **model_params):
         self.myid = myid  # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove?
         self.dispatcher = dispatcher
         self.finished = False
-        logger.info("initializing worker #%s" % myid)
+        logger.info("initializing worker #%s", myid)
         self.model = ldamodel.LdaModel(**model_params)
 
     @Pyro4.expose
@@ -70,27 +70,26 @@ def requestjob(self):
                 # no new job: try again, unless we're finished with all work
                 continue
         if job is not None:
-            logger.info("worker #%s received job #%i" % (self.myid, self.jobsdone))
+            logger.info("worker #%s received job #%i", self.myid, self.jobsdone)
             self.processjob(job)
             self.dispatcher.jobdone(self.myid)
         else:
-            logger.info("worker #%i stopping asking for jobs" % self.myid)
+            logger.info("worker #%i stopping asking for jobs", self.myid)
 
     @utils.synchronous('lock_update')
     def processjob(self, job):
-        logger.debug("starting to process job #%i" % self.jobsdone)
+        logger.debug("starting to process job #%i", self.jobsdone)
         self.model.do_estep(job)
         self.jobsdone += 1
         if SAVE_DEBUG and self.jobsdone % SAVE_DEBUG == 0:
             fname = os.path.join(tempfile.gettempdir(), 'lda_worker.pkl')
             self.model.save(fname)
-        logger.info("finished processing job #%i" % (self.jobsdone - 1))
+        logger.info("finished processing job #%i", self.jobsdone - 1)
 
     @Pyro4.expose
     @utils.synchronous('lock_update')
     def getstate(self):
-        logger.info("worker #%i returning its state after %s jobs" %
-                    (self.myid, self.jobsdone))
+        logger.info("worker #%i returning its state after %s jobs", self.myid, self.jobsdone)
         result = self.model.state
         assert isinstance(result, ldamodel.LdaState)
         self.model.clear()  # free up mem in-between two EM cycles
@@ -101,7 +100,7 @@ def getstate(self):
     @utils.synchronous('lock_update')
     def reset(self, state):
         assert state is not None
-        logger.info("resetting worker #%i" % self.myid)
+        logger.info("resetting worker #%i", self.myid)
         self.model.state = state
         self.model.sync_state()
         self.model.state.reset()
@@ -109,32 +108,29 @@ def reset(self, state):
 
     @Pyro4.oneway
     def exit(self):
-        logger.info("terminating worker #%i" % self.myid)
+        logger.info("terminating worker #%i", self.myid)
         os._exit(0)
-# endclass Worker
 
 
 def main():
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument("--host", help="Nameserver hostname (default: %(default)s)", default=None)
     parser.add_argument("--port", help="Nameserver port (default: %(default)s)", default=None, type=int)
-    parser.add_argument("--no-broadcast", help="Disable broadcast (default: %(default)s)",
-                        action='store_const', default=True, const=False)
+    parser.add_argument("--no-broadcast", help="Disable broadcast (default: %(default)s)", action='store_const', default=True, const=False)
     parser.add_argument("--hmac", help="Nameserver hmac key (default: %(default)s)", default=None)
-    parser.add_argument('-v', '--verbose', help='Verbose flag', action='store_const', dest="loglevel",
-                        const=logging.INFO, default=logging.WARNING)
+    parser.add_argument('-v', '--verbose', help='Verbose flag', action='store_const', dest="loglevel", const=logging.INFO, default=logging.WARNING)
     args = parser.parse_args()
 
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=args.loglevel)
     logger.info("running %s", " ".join(sys.argv))
 
-    ns_conf = {"broadcast": args.no_broadcast,
-               "host": args.host,
-               "port": args.port,
-               "hmac_key": args.hmac}
-
+    ns_conf = {
+        "broadcast": args.no_broadcast,
+        "host": args.host,
+        "port": args.port,
+        "hmac_key": args.hmac
+    }
     utils.pyro_daemon(LDA_WORKER_PREFIX, Worker(), random_suffix=True, ns_conf=ns_conf)
-
     logger.info("finished running %s", " ".join(sys.argv))
 
 
diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py
index 9ed01d84c8..91c4c450a6 100755
--- a/gensim/models/ldamodel.py
+++ b/gensim/models/ldamodel.py
@@ -48,12 +48,7 @@
 from gensim.models.callbacks import Callback
 
 # log(sum(exp(x))) that tries to avoid overflow
-try:
-    # try importing from here if older scipy is installed
-    from scipy.maxentropy import logsumexp
-except ImportError:
-    # maxentropy has been removed in recent releases, logsumexp now in misc
-    from scipy.misc import logsumexp
+from scipy.misc import logsumexp
 
 
 logger = logging.getLogger('gensim.models.ldamodel')
@@ -147,8 +142,7 @@ def blend(self, rhot, other, targetsize=None):
         if other.numdocs == 0 or targetsize == other.numdocs:
             scale = 1.0
         else:
-            logger.info("merging changes from %i documents into a model of %i documents",
-                        other.numdocs, targetsize)
+            logger.info("merging changes from %i documents into a model of %i documents", other.numdocs, targetsize)
             scale = 1.0 * targetsize / other.numdocs
         self.sstats += rhot * scale * other.sstats
 
@@ -288,7 +282,8 @@ def __init__(self, corpus=None, num_topics=100, id2word=None,
 
         self.alpha, self.optimize_alpha = self.init_dir_prior(alpha, 'alpha')
 
-        assert self.alpha.shape == (self.num_topics,), "Invalid alpha shape. Got shape %s, but expected (%d, )" % (str(self.alpha.shape), self.num_topics)
+        assert self.alpha.shape == (self.num_topics,), \
+            "Invalid alpha shape. Got shape %s, but expected (%d, )" % (str(self.alpha.shape), self.num_topics)
 
         if isinstance(eta, six.string_types):
             if eta == 'asymmetric':
@@ -324,10 +319,12 @@ def __init__(self, corpus=None, num_topics=100, id2word=None,
                     from gensim.models.lda_dispatcher import LDA_DISPATCHER_PREFIX
                     self.dispatcher = Pyro4.Proxy(ns.list(prefix=LDA_DISPATCHER_PREFIX)[LDA_DISPATCHER_PREFIX])
                     logger.debug("looking for dispatcher at %s" % str(self.dispatcher._pyroUri))
-                    self.dispatcher.initialize(id2word=self.id2word, num_topics=self.num_topics,
-                                               chunksize=chunksize, alpha=alpha, eta=eta, distributed=False)
+                    self.dispatcher.initialize(
+                        id2word=self.id2word, num_topics=self.num_topics, chunksize=chunksize,
+                        alpha=alpha, eta=eta, distributed=False
+                    )
                     self.numworkers = len(self.dispatcher.getworkers())
-                    logger.info("using distributed version with %i workers" % self.numworkers)
+                    logger.info("using distributed version with %i workers", self.numworkers)
             except Exception as err:
                 logger.error("failed to initialize distributed LDA (%s)", err)
                 raise RuntimeError("failed to initialize distributed LDA (%s)" % err)
@@ -382,8 +379,9 @@ def init_dir_prior(self, prior, name):
         return init_prior, is_auto
 
     def __str__(self):
-        return "LdaModel(num_terms=%s, num_topics=%s, decay=%s, chunksize=%s)" % \
-            (self.num_terms, self.num_topics, self.decay, self.chunksize)
+        return "LdaModel(num_terms=%s, num_topics=%s, decay=%s, chunksize=%s)" % (
+            self.num_terms, self.num_topics, self.decay, self.chunksize
+        )
 
     def sync_state(self):
         self.expElogbeta = np.exp(self.state.get_Elogbeta())
@@ -412,8 +410,8 @@ def inference(self, chunk, collect_sstats=False):
 
         """
         try:
-            _ = len(chunk)
-        except Exception:
+            len(chunk)
+        except TypeError:
             # convert iterators/generators to plain list, so we have len() etc.
             chunk = list(chunk)
         if len(chunk) > 1:
@@ -436,9 +434,9 @@ def inference(self, chunk, collect_sstats=False):
         for d, doc in enumerate(chunk):
             if len(doc) > 0 and not isinstance(doc[0][0], six.integer_types + (np.integer,)):
                 # make sure the term IDs are ints, otherwise np will get upset
-                ids = [int(id) for id, _ in doc]
+                ids = [int(idx) for idx, _ in doc]
             else:
-                ids = [id for id, _ in doc]
+                ids = [idx for idx, _ in doc]
             cts = np.array([cnt for _, cnt in doc])
             gammad = gamma[d, :]
             Elogthetad = Elogtheta[d, :]
@@ -462,7 +460,7 @@ def inference(self, chunk, collect_sstats=False):
                 phinorm = np.dot(expElogthetad, expElogbetad) + 1e-100
                 # If gamma hasn't changed much, we're done.
                 meanchange = np.mean(abs(gammad - lastgamma))
-                if (meanchange < self.gamma_threshold):
+                if meanchange < self.gamma_threshold:
                     converged += 1
                     break
             gamma[d, :] = gammad
@@ -472,8 +470,7 @@ def inference(self, chunk, collect_sstats=False):
                 sstats[:, ids] += np.outer(expElogthetad.T, cts / phinorm)
 
         if len(chunk) > 1:
-            logger.debug("%i/%i documents converged within %i iterations",
-                         converged, len(chunk), self.iterations)
+            logger.debug("%i/%i documents converged within %i iterations", converged, len(chunk), self.iterations)
 
         if collect_sstats:
             # This step finishes computing the sufficient statistics for the
@@ -533,8 +530,10 @@ def log_perplexity(self, chunk, total_docs=None):
         corpus_words = sum(cnt for document in chunk for _, cnt in document)
         subsample_ratio = 1.0 * total_docs / len(chunk)
         perwordbound = self.bound(chunk, subsample_ratio=subsample_ratio) / (subsample_ratio * corpus_words)
-        logger.info("%.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words" %
-                    (perwordbound, np.exp2(-perwordbound), len(chunk), corpus_words))
+        logger.info(
+            "%.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words",
+            perwordbound, np.exp2(-perwordbound), len(chunk), corpus_words
+        )
         return perwordbound
 
     def update(self, corpus, chunksize=None, decay=None, offset=None,
@@ -621,12 +620,14 @@ def update(self, corpus, chunksize=None, decay=None, offset=None,
             "iterating %ix with a convergence threshold of %f",
             updatetype, self.num_topics, passes, lencorpus,
             updateafter, evalafter, iterations,
-            gamma_threshold)
+            gamma_threshold
+        )
 
         if updates_per_pass * passes < 10:
             logger.warning(
-                "too few updates, training might not converge; consider "
-                "increasing the number of passes or iterations to improve accuracy")
+                "too few updates, training might not converge; "
+                "consider increasing the number of passes or iterations to improve accuracy"
+            )
 
         # rho is the "speed" of updating; TODO try other fncs
         # pass_ + num_updates handles increasing the starting t for each pass,
@@ -643,7 +644,7 @@ def rho():
 
         for pass_ in xrange(passes):
             if self.dispatcher:
-                logger.info('initializing %s workers' % self.numworkers)
+                logger.info('initializing %s workers', self.numworkers)
                 self.dispatcher.reset(self.state)
             else:
                 other = LdaState(self.eta, self.state.sstats.shape)
@@ -658,13 +659,17 @@ def rho():
 
                 if self.dispatcher:
                     # add the chunk to dispatcher's job queue, so workers can munch on it
-                    logger.info('PROGRESS: pass %i, dispatching documents up to #%i/%i',
-                                pass_, chunk_no * chunksize + len(chunk), lencorpus)
+                    logger.info(
+                        "PROGRESS: pass %i, dispatching documents up to #%i/%i",
+                        pass_, chunk_no * chunksize + len(chunk), lencorpus
+                    )
                     # this will eventually block until some jobs finish, because the queue has a small finite length
                     self.dispatcher.putjob(chunk)
                 else:
-                    logger.info('PROGRESS: pass %i, at document #%i/%i',
-                                pass_, chunk_no * chunksize + len(chunk), lencorpus)
+                    logger.info(
+                        "PROGRESS: pass %i, at document #%i/%i",
+                        pass_, chunk_no * chunksize + len(chunk), lencorpus
+                    )
                     gammat = self.do_estep(chunk, other)
 
                     if self.optimize_alpha:
@@ -870,7 +875,7 @@ def get_topic_terms(self, topicid, topn=10):
         topic = self.get_topics()[topicid]
         topic = topic / topic.sum()  # normalize to probability distribution
         bestn = matutils.argsort(topic, topn, reverse=True)
-        return [(id, topic[id]) for id in bestn]
+        return [(idx, topic[idx]) for idx in bestn]
 
     def top_topics(self, corpus=None, texts=None, dictionary=None, window_size=None,
                    coherence='u_mass', topn=20, processes=-1):
@@ -888,7 +893,8 @@ def top_topics(self, corpus=None, texts=None, dictionary=None, window_size=None,
         cm = CoherenceModel(
             model=self, corpus=corpus, texts=texts, dictionary=dictionary,
             window_size=window_size, coherence=coherence, topn=topn,
-            processes=processes)
+            processes=processes
+        )
         coherence_scores = cm.get_coherence_per_topic()
 
         str_topics = []
@@ -995,7 +1001,8 @@ def get_term_topics(self, word_id, minimum_probability=None):
 
         return values
 
-    def diff(self, other, distance="kullback_leibler", num_words=100, n_ann_terms=10, diagonal=False, annotation=True, normed=True):
+    def diff(self, other, distance="kullback_leibler", num_words=100,
+             n_ann_terms=10, diagonal=False, annotation=True, normed=True):
         """
         Calculate difference topic2topic between two Lda models
         `other` instances of `LdaMulticore` or `LdaModel`
@@ -1049,7 +1056,8 @@ def diff(self, other, distance="kullback_leibler", num_words=100, n_ann_terms=10
             d1, d2 = fst_topics, snd_topics
 
         if diagonal:
-            assert t1_size == t2_size, "Both input models should have same no. of topics, as the diagonal will only be valid in a square matrix"
+            assert t1_size == t2_size, \
+                "Both input models should have same no. of topics, as the diagonal will only be valid in a square matrix"
             # initialize z and annotation array
             z = np.zeros(t1_size)
             if annotation:
@@ -1096,7 +1104,7 @@ def __getitem__(self, bow, eps=None):
         """
         return self.get_document_topics(bow, eps, self.minimum_phi_value, self.per_word_topics)
 
-    def save(self, fname, ignore=['state', 'dispatcher'], separately=None, *args, **kwargs):
+    def save(self, fname, ignore=('state', 'dispatcher'), separately=None, *args, **kwargs):
         """
         Save the model to file.
 
@@ -1135,7 +1143,7 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately=None, *args, **
             if isinstance(ignore, six.string_types):
                 ignore = [ignore]
             ignore = [e for e in ignore if e]  # make sure None and '' are not in the list
-            ignore = list(set(['state', 'dispatcher', 'id2word']) | set(ignore))
+            ignore = list({'state', 'dispatcher', 'id2word'} | set(ignore))
         else:
             ignore = ['state', 'dispatcher', 'id2word']
 
@@ -1188,10 +1196,9 @@ def load(cls, fname, *args, **kwargs):
         # check if `id2word_fname` file is present on disk
         # if present -> the model to be loaded was saved using a >= 0.13.2 version of Gensim, so set `result.id2word` using the `id2word_fname` file
         # if not present -> the model to be loaded was saved using a < 0.13.2 version of Gensim, so `result.id2word` already set after the main pickle load
-        if (os.path.isfile(id2word_fname)):
+        if os.path.isfile(id2word_fname):
             try:
                 result.id2word = utils.unpickle(id2word_fname)
             except Exception as e:
                 logging.warning("failed to load id2word dictionary from %s: %s", id2word_fname, e)
         return result
-# endclass LdaModel
diff --git a/gensim/models/ldamulticore.py b/gensim/models/ldamulticore.py
index 39c3c40666..0c22c64f7c 100644
--- a/gensim/models/ldamulticore.py
+++ b/gensim/models/ldamulticore.py
@@ -143,11 +143,13 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, workers=None,
         if isinstance(alpha, six.string_types) and alpha == 'auto':
             raise NotImplementedError("auto-tuning alpha not implemented in multicore LDA; use plain LdaModel.")
 
-        super(LdaMulticore, self).__init__(corpus=corpus, num_topics=num_topics,
+        super(LdaMulticore, self).__init__(
+            corpus=corpus, num_topics=num_topics,
             id2word=id2word, chunksize=chunksize, passes=passes, alpha=alpha, eta=eta,
             decay=decay, offset=offset, eval_every=eval_every, iterations=iterations,
             gamma_threshold=gamma_threshold, random_state=random_state, minimum_probability=minimum_probability,
-            minimum_phi_value=minimum_phi_value, per_word_topics=per_word_topics)
+            minimum_phi_value=minimum_phi_value, per_word_topics=per_word_topics
+        )
 
     def update(self, corpus, chunks_as_numpy=False):
         """
@@ -169,7 +171,7 @@ def update(self, corpus, chunks_as_numpy=False):
         """
         try:
             lencorpus = len(corpus)
-        except Exception:
+        except TypeError:
             logger.warning("input corpus stream has no len(); counting documents")
             lencorpus = sum(1 for _ in corpus)
         if lencorpus == 0:
@@ -187,15 +189,17 @@ def update(self, corpus, chunks_as_numpy=False):
         evalafter = min(lencorpus, (self.eval_every or 0) * updateafter)
 
         updates_per_pass = max(1, lencorpus / updateafter)
-        logger.info("running %s LDA training, %s topics, %i passes over the"
-            " supplied corpus of %i documents, updating every %i documents,"
-            " evaluating every ~%i documents, iterating %ix with a convergence threshold of %f",
-            updatetype, self.num_topics, self.passes, lencorpus, updateafter, evalafter,
-            self.iterations, self.gamma_threshold)
+        logger.info(
+            "running %s LDA training, %s topics, %i passes over the supplied corpus of %i documents, "
+            "updating every %i documents, evaluating every ~%i documents, iterating %ix with a convergence threshold of %f",
+            updatetype, self.num_topics, self.passes, lencorpus, updateafter, evalafter, self.iterations, self.gamma_threshold
+        )
 
         if updates_per_pass * self.passes < 10:
-            logger.warning("too few updates, training might not converge; consider "
-                "increasing the number of passes or iterations to improve accuracy")
+            logger.warning(
+                "too few updates, training might not converge; "
+                "consider increasing the number of passes or iterations to improve accuracy"
+            )
 
         job_queue = Queue(maxsize=2 * self.workers)
         result_queue = Queue()
@@ -240,9 +244,10 @@ def process_result_queue(force=False):
                         job_queue.put((chunk_no, chunk, self), block=False, timeout=0.1)
                         chunk_put = True
                         queue_size[0] += 1
-                        logger.info('PROGRESS: pass %i, dispatched chunk #%i = '
-                            'documents up to #%i/%i, outstanding queue size %i',
-                            pass_, chunk_no, chunk_no * self.chunksize + len(chunk), lencorpus, queue_size[0])
+                        logger.info(
+                            "PROGRESS: pass %i, dispatched chunk #%i = documents up to #%i/%i, outstanding queue size %i",
+                            pass_, chunk_no, chunk_no * self.chunksize + len(chunk), lencorpus, queue_size[0]
+                        )
                     except queue.Full:
                         # in case the input job queue is full, keep clearing the
                         # result queue, to make sure we don't deadlock
diff --git a/gensim/models/ldaseqmodel.py b/gensim/models/ldaseqmodel.py
index 6399e17aae..26709c04d6 100644
--- a/gensim/models/ldaseqmodel.py
+++ b/gensim/models/ldaseqmodel.py
@@ -50,8 +50,8 @@ class LdaSeqModel(utils.SaveLoad):
     """
 
     def __init__(self, corpus=None, time_slice=None, id2word=None, alphas=0.01, num_topics=10,
-                initialize='gensim', sstats=None, lda_model=None, obs_variance=0.5, chain_variance=0.005, passes=10,
-                random_state=None, lda_inference_max_iter=25, em_min_iter=6, em_max_iter=20, chunksize=100):
+                 initialize='gensim', sstats=None, lda_model=None, obs_variance=0.5, chain_variance=0.005, passes=10,
+                 random_state=None, lda_inference_max_iter=25, em_min_iter=6, em_max_iter=20, chunksize=100):
         """
         `corpus` is any iterable gensim corpus
 
@@ -91,7 +91,7 @@ def __init__(self, corpus=None, time_slice=None, id2word=None, alphas=0.01, num_
         if corpus is not None:
             try:
                 self.corpus_len = len(corpus)
-            except Exception:
+            except TypeError:
                 logger.warning("input corpus stream has no len(); counting documents")
                 self.corpus_len = sum(1 for _ in corpus)
 
@@ -113,7 +113,10 @@ def __init__(self, corpus=None, time_slice=None, id2word=None, alphas=0.01, num_
         # the sslm class is described below and contains information on topic-word probabilities and doc-topic probabilities.
         self.topic_chains = []
         for topic in range(0, num_topics):
-            sslm_ = sslm(num_time_slices=self.num_time_slices, vocab_len=self.vocab_len, num_topics=self.num_topics, chain_variance=chain_variance, obs_variance=obs_variance)
+            sslm_ = sslm(
+                num_time_slices=self.num_time_slices, vocab_len=self.vocab_len, num_topics=self.num_topics,
+                chain_variance=chain_variance, obs_variance=obs_variance
+            )
             self.topic_chains.append(sslm_)
 
         # the following are class variables which are to be integrated during Document Influence Model
@@ -125,7 +128,10 @@ def __init__(self, corpus=None, time_slice=None, id2word=None, alphas=0.01, num_
         # if a corpus and time_slice is provided, depending on the user choice of initializing LDA, we start DTM.
         if corpus is not None and time_slice is not None:
             if initialize == 'gensim':
-                lda_model = ldamodel.LdaModel(corpus, id2word=self.id2word, num_topics=self.num_topics, passes=passes, alpha=self.alphas, random_state=random_state)
+                lda_model = ldamodel.LdaModel(
+                    corpus, id2word=self.id2word, num_topics=self.num_topics,
+                    passes=passes, alpha=self.alphas, random_state=random_state
+                )
                 self.sstats = np.transpose(lda_model.state.sstats)
             if initialize == 'ldamodel':
                 self.sstats = np.transpose(lda_model.state.sstats)
@@ -206,7 +212,7 @@ def fit_lda_seq(self, corpus, lda_inference_max_iter, em_min_iter, em_max_iter,
             topic_bound = self.fit_lda_seq_topics(topic_suffstats)
             bound += topic_bound
 
-            if ((bound - old_bound) < 0):
+            if (bound - old_bound) < 0:
                 # if max_iter is too low, increase iterations.
                 if lda_inference_max_iter < LOWER_ITER:
                     lda_inference_max_iter *= ITER_MULT_LOW
@@ -227,7 +233,8 @@ def fit_lda_seq(self, corpus, lda_inference_max_iter, em_min_iter, em_max_iter,
 
         return bound
 
-    def lda_seq_infer(self, corpus, topic_suffstats, gammas, lhoods, iter_, lda_inference_max_iter, chunksize):
+    def lda_seq_infer(self, corpus, topic_suffstats, gammas, lhoods,
+                      iter_, lda_inference_max_iter, chunksize):
         """
         Inference or E- Step.
         This is used to set up the gensim LdaModel to be used for each time-slice.
@@ -243,14 +250,21 @@ def lda_seq_infer(self, corpus, topic_suffstats, gammas, lhoods, iter_, lda_infe
 
         model = "DTM"
         if model == "DTM":
-            bound, gammas = self.inferDTMseq(corpus, topic_suffstats, gammas, lhoods, lda, ldapost, iter_, bound, lda_inference_max_iter, chunksize)
+            bound, gammas = self.inferDTMseq(
+                corpus, topic_suffstats, gammas, lhoods, lda,
+                ldapost, iter_, bound, lda_inference_max_iter, chunksize
+            )
         elif model == "DIM":
             self.InfluenceTotalFixed(corpus)
-            bound, gammas = self.inferDIMseq(corpus, topic_suffstats, gammas, lhoods, lda, ldapost, iter_, bound, lda_inference_max_iter, chunksize)
+            bound, gammas = self.inferDIMseq(
+                corpus, topic_suffstats, gammas, lhoods, lda,
+                ldapost, iter_, bound, lda_inference_max_iter, chunksize
+            )
 
         return bound, gammas
 
-    def inferDTMseq(self, corpus, topic_suffstats, gammas, lhoods, lda, ldapost, iter_, bound, lda_inference_max_iter, chunksize):
+    def inferDTMseq(self, corpus, topic_suffstats, gammas, lhoods, lda,
+                    ldapost, iter_, bound, lda_inference_max_iter, chunksize):
         """
         Computes the likelihood of a sequential corpus under an LDA seq model, and return the likelihood bound.
         Need to pass the LdaSeq model, corpus, sufficient stats, gammas and lhoods matrices previously created,
@@ -281,9 +295,13 @@ def inferDTMseq(self, corpus, topic_suffstats, gammas, lhoods, lda, ldapost, ite
 
                 # TODO: replace fit_lda_post with appropriate ldamodel functions, if possible.
                 if iter_ == 0:
-                    doc_lhood = LdaPost.fit_lda_post(ldapost, doc_num, time, None, lda_inference_max_iter=lda_inference_max_iter)
+                    doc_lhood = LdaPost.fit_lda_post(
+                        ldapost, doc_num, time, None, lda_inference_max_iter=lda_inference_max_iter
+                    )
                 else:
-                    doc_lhood = LdaPost.fit_lda_post(ldapost, doc_num, time, self, lda_inference_max_iter=lda_inference_max_iter)
+                    doc_lhood = LdaPost.fit_lda_post(
+                        ldapost, doc_num, time, self, lda_inference_max_iter=lda_inference_max_iter
+                    )
 
                 if topic_suffstats is not None:
                     topic_suffstats = LdaPost.update_lda_seq_ss(ldapost, time, doc, topic_suffstats)
@@ -310,7 +328,6 @@ def fit_lda_seq_topics(self, topic_suffstats):
         Fit lda sequence topic wise.
         """
         lhood = 0
-        lhood_term = 0
 
         for k, chain in enumerate(self.topic_chains):
             logger.info("Fitting topic number %i", k)
@@ -416,8 +433,6 @@ def __getitem__(self, doc):
         # should even the likelihoods be returned?
         return doc_topic
 
-# endclass LdaSeqModel
-
 
 class sslm(utils.SaveLoad):
     """
@@ -593,10 +608,8 @@ def fit_sslm(self, sstats):
         sslm_max_iter = 2
         converged = sslm_fit_threshold + 1
 
-        totals = np.zeros(sstats.shape[1])
-
         # computing variance, fwd_variance
-        self.variance, self.fwd_variance = map(np.array, list(zip(*[self.compute_post_variance(w, self.chain_variance) for w in range(0, W)])))
+        self.variance, self.fwd_variance = (np.array(x) for x in list(zip(*[self.compute_post_variance(w, self.chain_variance) for w in range(0, W)])))
 
         # column sum of sstats
         totals = sstats.sum(axis=0)
@@ -631,8 +644,8 @@ def compute_bound(self, sstats, totals):
         Compute log probability bound.
         Forumula is as described in appendix of DTM by Blei. (formula no. 5)
         """
-        W = self.vocab_len
-        T = self.num_time_slices
+        w = self.vocab_len
+        t = self.num_time_slices
 
         term_1 = 0
         term_2 = 0
@@ -643,19 +656,19 @@ def compute_bound(self, sstats, totals):
 
         chain_variance = self.chain_variance
         # computing mean, fwd_mean
-        self.mean, self.fwd_mean = map(np.array, (zip(*[self.compute_post_mean(w, self.chain_variance) for w in range(0, W)])))
+        self.mean, self.fwd_mean = (np.array(x) for x in zip(*[self.compute_post_mean(w, self.chain_variance) for w in range(0, w)]))
         self.zeta = self.update_zeta()
 
-        for w in range(0, W):
-            val += (self.variance[w][0] - self.variance[w][T]) / 2 * chain_variance
+        for w in range(0, w):
+            val += (self.variance[w][0] - self.variance[w][t]) / 2 * chain_variance
 
         logger.info("Computing bound, all times")
 
-        for t in range(1, T + 1):
+        for t in range(1, t + 1):
             term_1 = 0.0
             term_2 = 0.0
             ent = 0.0
-            for w in range(0, W):
+            for w in range(0, w):
 
                 m = self.mean[w][t]
                 prev_m = self.mean[w][t - 1]
@@ -725,7 +738,9 @@ def update_obs(self, sstats, totals):
 
                 if model == "DTM":
                     # slowest part of method
-                    obs = optimize.fmin_cg(f=f_obs, fprime=df_obs, x0=obs, gtol=TOL, args=args, epsilon=STEP_SIZE, disp=0)
+                    obs = optimize.fmin_cg(
+                        f=f_obs, fprime=df_obs, x0=obs, gtol=TOL, args=args, epsilon=STEP_SIZE, disp=0
+                    )
                 if model == "DIM":
                     pass
                 runs += 1
@@ -803,7 +818,6 @@ def compute_obs_deriv(self, word, word_counts, totals, mean_deriv_mtx, deriv):
 
             for u in range(1, T + 1):
                 mean_u = mean[u]
-                variance_u_prev = variance[u - 1]  # noqa:F841
                 mean_u_prev = mean[u - 1]
                 dmean_u = mean_deriv[u]
                 dmean_u_prev = mean_deriv[u - 1]
@@ -1026,7 +1040,6 @@ def update_lda_seq_ss(self, time, doc, topic_suffstats):
             topic_suffstats[k] = topic_ss
 
         return topic_suffstats
-# endclass LdaPost
 
 
 # the following functions are used in update_obs as the function to optimize
@@ -1060,7 +1073,6 @@ def f_obs(x, *args):
     for t in range(1, T + 1):
         mean_t = mean[t]
         mean_t_prev = mean[t - 1]
-        var_t_prev = variance[t - 1]  # noqa:F841
 
         val = mean_t - mean_t_prev
         term1 += val * val
diff --git a/gensim/models/logentropy_model.py b/gensim/models/logentropy_model.py
index d4bfc93479..05f79ae3c2 100644
--- a/gensim/models/logentropy_model.py
+++ b/gensim/models/logentropy_model.py
@@ -45,7 +45,7 @@ class LogEntropyModel(interfaces.TransformationABC):
     Model persistency is achieved via its load/save methods.
     """
 
-    def __init__(self, corpus, id2word=None, normalize=True):
+    def __init__(self, corpus, normalize=True):
         """
         `normalize` dictates whether the resulting vectors will be
         set to unit length.
@@ -58,8 +58,7 @@ def __init__(self, corpus, id2word=None, normalize=True):
             self.initialize(corpus)
 
     def __str__(self):
-        return "LogEntropyModel(n_docs=%s, n_words=%s)" % (self.n_docs,
-                                                           self.n_words)
+        return "LogEntropyModel(n_docs=%s, n_words=%s)" % (self.n_docs, self.n_words)
 
     def initialize(self, corpus):
         """
@@ -71,7 +70,7 @@ def initialize(self, corpus):
         glob_num_words, doc_no = 0, -1
         for doc_no, bow in enumerate(corpus):
             if doc_no % 10000 == 0:
-                logger.info("PROGRESS: processing document #%i" % doc_no)
+                logger.info("PROGRESS: processing document #%i", doc_no)
             glob_num_words += len(bow)
             for term_id, term_count in bow:
                 glob_freq[term_id] = glob_freq.get(term_id, 0) + term_count
@@ -81,14 +80,14 @@ def initialize(self, corpus):
         self.n_words = glob_num_words
 
         # and finally compute the global weights
-        logger.info("calculating global log entropy weights for %i "
-                     "documents and %i features (%i matrix non-zeros)"
-                     % (self.n_docs, len(glob_freq), self.n_words))
+        logger.info(
+            "calculating global log entropy weights for %i documents and %i features (%i matrix non-zeros)",
+            self.n_docs, len(glob_freq), self.n_words
+        )
         logger.debug('iterating over corpus')
         for doc_no2, bow in enumerate(corpus):
             for key, freq in bow:
-                p = (float(freq) / glob_freq[key]) * math.log(float(freq) /
-                                                              glob_freq[key])
+                p = (float(freq) / glob_freq[key]) * math.log(float(freq) / glob_freq[key])
                 self.entr[key] = self.entr.get(key, 0.0) + p
         if doc_no2 != doc_no:
             raise ValueError("LogEntropyModel doesn't support generators as training data")
@@ -107,8 +106,11 @@ def __getitem__(self, bow):
             return self._apply(bow)
 
         # unknown (new) terms will be given zero weight (NOT infinity/huge)
-        vector = [(term_id, math.log(tf + 1) * self.entr.get(term_id))
-                  for term_id, tf in bow if term_id in self.entr]
+        vector = [
+            (term_id, math.log(tf + 1) * self.entr.get(term_id))
+            for term_id, tf in bow
+            if term_id in self.entr
+        ]
         if self.normalize:
             vector = matutils.unitvec(vector)
         return vector
diff --git a/gensim/models/lsi_dispatcher.py b/gensim/models/lsi_dispatcher.py
index 5a69327522..dd18734dfb 100755
--- a/gensim/models/lsi_dispatcher.py
+++ b/gensim/models/lsi_dispatcher.py
@@ -80,7 +80,7 @@ def initialize(self, **model_params):
                     worker = Pyro4.Proxy(uri)
                     workerid = len(self.workers)
                     # make time consuming methods work asynchronously
-                    logger.info("registering worker #%i from %s" % (workerid, uri))
+                    logger.info("registering worker #%i from %s", workerid, uri)
                     worker.initialize(workerid, dispatcher=self.callback, **model_params)
                     self.workers[workerid] = worker
                 except Pyro4.errors.PyroError:
@@ -99,16 +99,16 @@ def getworkers(self):
 
     @Pyro4.expose
     def getjob(self, worker_id):
-        logger.info("worker #%i requesting a new job" % worker_id)
+        logger.info("worker #%i requesting a new job", worker_id)
         job = self.jobs.get(block=True, timeout=1)
-        logger.info("worker #%i got a new job (%i left)" % (worker_id, self.jobs.qsize()))
+        logger.info("worker #%i got a new job (%i left)", worker_id, self.jobs.qsize())
         return job
 
     @Pyro4.expose
     def putjob(self, job):
         self._jobsreceived += 1
         self.jobs.put(job, block=True, timeout=HUGE_TIMEOUT)
-        logger.info("added a new job (len(queue)=%i items)" % self.jobs.qsize())
+        logger.info("added a new job (len(queue)=%i items)", self.jobs.qsize())
 
     @Pyro4.expose
     def getstate(self):
@@ -116,7 +116,7 @@ def getstate(self):
         Merge projections from across all workers and return the final projection.
         """
         logger.info("end of input, assigning all remaining jobs")
-        logger.debug("jobs done: %s, jobs received: %s" % (self._jobsdone, self._jobsreceived))
+        logger.debug("jobs done: %s, jobs received: %s", self._jobsdone, self._jobsreceived)
         while self._jobsdone < self._jobsreceived:
             time.sleep(0.5)  # check every half a second
 
@@ -124,11 +124,11 @@ def getstate(self):
         # and not `workers - 1` merges!
         # but merging only takes place once, after all input data has been processed,
         # so the overall effect would be small... compared to the amount of coding :-)
-        logger.info("merging states from %i workers" % len(self.workers))
+        logger.info("merging states from %i workers", self.workers)
         workers = list(self.workers.items())
         result = workers[0][1].getstate()
         for workerid, worker in workers[1:]:
-            logger.info("pulling state from worker %s" % workerid)
+            logger.info("pulling state from worker %s", workerid)
             result.merge(worker.getstate())
         logger.info("sending out merged projection")
         return result
@@ -139,7 +139,7 @@ def reset(self):
         Initialize all workers for a new decomposition.
         """
         for workerid, worker in iteritems(self.workers):
-            logger.info("resetting worker %s" % workerid)
+            logger.info("resetting worker %s", workerid)
             worker.reset()
             worker.requestjob()
         self._jobsdone = 0
@@ -157,7 +157,7 @@ def jobdone(self, workerid):
         worker.requestjob().
         """
         self._jobsdone += 1
-        logger.info("worker #%s finished job #%i" % (workerid, self._jobsdone))
+        logger.info("worker #%s finished job #%i", workerid, self._jobsdone)
         worker = self.workers[workerid]
         worker.requestjob()  # tell the worker to ask for another job, asynchronously (one-way)
 
@@ -171,7 +171,7 @@ def exit(self):
         Terminate all registered workers and then the dispatcher.
         """
         for workerid, worker in iteritems(self.workers):
-            logger.info("terminating worker %s" % workerid)
+            logger.info("terminating worker %s", workerid)
             worker.exit()
         logger.info("terminating dispatcher")
         os._exit(0)  # exit the whole process (not just this thread ala sys.exit())
@@ -180,7 +180,7 @@ def exit(self):
 
 def main():
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
-    logger.info("running %s" % " ".join(sys.argv))
+    logger.info("running %s", " ".join(sys.argv))
 
     program = os.path.basename(sys.argv[0])
     # make sure we have enough cmd line parameters
@@ -194,7 +194,7 @@ def main():
         maxsize = int(sys.argv[1])
     utils.pyro_daemon('gensim.lsi_dispatcher', Dispatcher(maxsize=maxsize))
 
-    logger.info("finished running %s" % program)
+    logger.info("finished running %s", program)
 
 
 if __name__ == '__main__':
diff --git a/gensim/models/lsi_worker.py b/gensim/models/lsi_worker.py
index 4cae372ffd..ffb31eafb9 100755
--- a/gensim/models/lsi_worker.py
+++ b/gensim/models/lsi_worker.py
@@ -47,7 +47,7 @@ def initialize(self, myid, dispatcher, **model_params):
         self.myid = myid  # id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove?
         self.dispatcher = dispatcher
         self.finished = False
-        logger.info("initializing worker #%s" % myid)
+        logger.info("initializing worker #%s", myid)
         self.model = lsimodel.LsiModel(**model_params)
 
     @Pyro4.expose
@@ -67,11 +67,11 @@ def requestjob(self):
                 # no new job: try again, unless we're finished with all work
                 continue
         if job is not None:
-            logger.info("worker #%s received job #%i" % (self.myid, self.jobsdone))
+            logger.info("worker #%s received job #%i", self.myid, self.jobsdone)
             self.processjob(job)
             self.dispatcher.jobdone(self.myid)
         else:
-            logger.info("worker #%i stopping asking for jobs" % self.myid)
+            logger.info("worker #%i stopping asking for jobs", self.myid)
 
     @utils.synchronous('lock_update')
     def processjob(self, job):
@@ -84,8 +84,7 @@ def processjob(self, job):
     @Pyro4.expose
     @utils.synchronous('lock_update')
     def getstate(self):
-        logger.info("worker #%i returning its state after %s jobs" %
-                    (self.myid, self.jobsdone))
+        logger.info("worker #%i returning its state after %s jobs", self.myid, self.jobsdone)
         assert isinstance(self.model.projection, lsimodel.Projection)
         self.finished = True
         return self.model.projection
@@ -93,20 +92,20 @@ def getstate(self):
     @Pyro4.expose
     @utils.synchronous('lock_update')
     def reset(self):
-        logger.info("resetting worker #%i" % self.myid)
+        logger.info("resetting worker #%i", self.myid)
         self.model.projection = self.model.projection.empty_like()
         self.finished = False
 
     @Pyro4.oneway
     def exit(self):
-        logger.info("terminating worker #%i" % self.myid)
+        logger.info("terminating worker #%i", self.myid)
         os._exit(0)
 # endclass Worker
 
 
 def main():
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
-    logger.info("running %s" % " ".join(sys.argv))
+    logger.info("running %s", " ".join(sys.argv))
 
     program = os.path.basename(sys.argv[0])
     # make sure we have enough cmd line parameters
@@ -116,7 +115,7 @@ def main():
 
     utils.pyro_daemon('gensim.lsi_worker', Worker(), random_suffix=True)
 
-    logger.info("finished running %s" % program)
+    logger.info("finished running %s", program)
 
 
 if __name__ == '__main__':
diff --git a/gensim/models/lsimodel.py b/gensim/models/lsimodel.py
index f9e16cd23b..e55a009ab8 100644
--- a/gensim/models/lsimodel.py
+++ b/gensim/models/lsimodel.py
@@ -161,8 +161,9 @@ def merge(self, other, decay=1.0):
             self.s = other.s.copy()
             return
         if self.m != other.m:
-            raise ValueError("vector space mismatch: update is using %s features, expected %s" %
-                             (other.m, self.m))
+            raise ValueError(
+                "vector space mismatch: update is using %s features, expected %s" % (other.m, self.m)
+            )
         logger.info("merging projections: %s + %s", str(self.u.shape), str(other.u.shape))
         m, n1, n2 = self.u.shape[0], self.u.shape[1], other.u.shape[1]
         # TODO Maybe keep the bases as elementary reflectors, without
@@ -182,8 +183,10 @@ def merge(self, other, decay=1.0):
         assert not other.u
 
         # find the rotation that diagonalizes r
-        k = np.bmat([[np.diag(decay * self.s), np.multiply(c, other.s)],
-                        [matutils.pad(np.array([]).reshape(0, 0), min(m, n2), n1), np.multiply(r, other.s)]])
+        k = np.bmat([
+            [np.diag(decay * self.s), np.multiply(c, other.s)],
+            [matutils.pad(np.array([]).reshape(0, 0), min(m, n2), n1), np.multiply(r, other.s)]
+        ])
         logger.debug("computing SVD of %s dense matrix", k.shape)
         try:
             # in np < 1.1.0, running SVD sometimes results in "LinAlgError: SVD did not converge'.
@@ -216,9 +219,6 @@ def merge(self, other, decay=1.0):
             for i in xrange(self.u.shape[1]):
                 if self.u[0, i] < 0.0:
                     self.u[:, i] *= -1.0
-#        diff = np.dot(self.u.T, self.u) - np.eye(self.u.shape[1])
-#        logger.info('orth error after=%f' % np.sum(diff * diff))
-# endclass Projection
 
 
 class LsiModel(interfaces.TransformationABC, basemodel.BaseTopicModel):
@@ -300,7 +300,9 @@ def __init__(self, corpus=None, num_topics=200, id2word=None, chunksize=20000,
             self.num_terms = 1 + (max(self.id2word.keys()) if self.id2word else -1)
 
         self.docs_processed = 0
-        self.projection = Projection(self.num_terms, self.num_topics, power_iters=self.power_iters, extra_dims=self.extra_samples)
+        self.projection = Projection(
+            self.num_terms, self.num_topics, power_iters=self.power_iters, extra_dims=self.extra_samples
+        )
 
         self.numworkers = 1
         if not distributed:
@@ -308,16 +310,18 @@ def __init__(self, corpus=None, num_topics=200, id2word=None, chunksize=20000,
             self.dispatcher = None
         else:
             if not onepass:
-                raise NotImplementedError("distributed stochastic LSA not implemented yet; "
-                                          "run either distributed one-pass, or serial randomized.")
+                raise NotImplementedError(
+                    "distributed stochastic LSA not implemented yet; "
+                    "run either distributed one-pass, or serial randomized."
+                )
             try:
                 import Pyro4
                 dispatcher = Pyro4.Proxy('PYRONAME:gensim.lsi_dispatcher')
                 logger.debug("looking for dispatcher at %s", str(dispatcher._pyroUri))
-                dispatcher.initialize(id2word=self.id2word, num_topics=num_topics,
-                                      chunksize=chunksize, decay=decay,
-                                      power_iters=self.power_iters, extra_samples=self.extra_samples,
-                                      distributed=False, onepass=onepass)
+                dispatcher.initialize(
+                    id2word=self.id2word, num_topics=num_topics, chunksize=chunksize, decay=decay,
+                    power_iters=self.power_iters, extra_samples=self.extra_samples, distributed=False, onepass=onepass
+                )
                 self.dispatcher = dispatcher
                 self.numworkers = len(dispatcher.getworkers())
                 logger.info("using distributed version with %i workers", self.numworkers)
@@ -359,7 +363,8 @@ def add_documents(self, corpus, chunksize=None, decay=None):
                 update.u, update.s = stochastic_svd(
                     corpus, self.num_topics,
                     num_terms=self.num_terms, chunksize=chunksize,
-                    extra_dims=self.extra_samples, power_iters=self.power_iters)
+                    extra_dims=self.extra_samples, power_iters=self.power_iters
+                )
                 self.projection.merge(update, decay=decay)
                 self.docs_processed += len(corpus) if hasattr(corpus, '__len__') else 0
             else:
@@ -385,7 +390,10 @@ def add_documents(self, corpus, chunksize=None, decay=None):
                         logger.info("dispatched documents up to #%s", doc_no)
                     else:
                         # serial version, there is only one "worker" (myself) => process the job directly
-                        update = Projection(self.num_terms, self.num_topics, job, extra_dims=self.extra_samples, power_iters=self.power_iters)
+                        update = Projection(
+                            self.num_terms, self.num_topics, job, extra_dims=self.extra_samples,
+                            power_iters=self.power_iters
+                        )
                         del job
                         self.projection.merge(update, decay=decay)
                         del update
@@ -397,19 +405,21 @@ def add_documents(self, corpus, chunksize=None, decay=None):
                     logger.info("reached the end of input; now waiting for all remaining jobs to finish")
                     self.projection = self.dispatcher.getstate()
                 self.docs_processed += doc_no
-#            logger.info("top topics after adding %i documents" % doc_no)
-#            self.print_debug(10)
         else:
             assert not self.dispatcher, "must be in serial mode to receive jobs"
             assert self.onepass, "distributed two-pass algo not supported yet"
-            update = Projection(self.num_terms, self.num_topics, corpus.tocsc(), extra_dims=self.extra_samples, power_iters=self.power_iters)
+            update = Projection(
+                self.num_terms, self.num_topics, corpus.tocsc(), extra_dims=self.extra_samples,
+                power_iters=self.power_iters
+            )
             self.projection.merge(update, decay=decay)
             logger.info("processed sparse job of %i documents", corpus.shape[1])
             self.docs_processed += corpus.shape[1]
 
     def __str__(self):
         return "LsiModel(num_terms=%s, num_topics=%s, decay=%s, chunksize=%s)" % (
-            self.num_terms, self.num_topics, self.decay, self.chunksize)
+            self.num_terms, self.num_topics, self.decay, self.chunksize
+        )
 
     def __getitem__(self, bow, scaled=False, chunksize=512):
         """
@@ -579,9 +589,8 @@ def load(cls, fname, *args, **kwargs):
         try:
             result.projection = super(LsiModel, cls).load(projection_fname, *args, **kwargs)
         except Exception as e:
-            logging.warning("failed to load projection from %s: %s" % (projection_fname, e))
+            logging.warning("failed to load projection from %s: %s", projection_fname, e)
         return result
-# endclass LsiModel
 
 
 def print_debug(id2token, u, s, topics, num_words=10, num_neg=None):
@@ -679,7 +688,7 @@ def stochastic_svd(corpus, rank, num_terms, chunksize=20000, extra_dims=None,
         q, _ = matutils.qr_destroy(y)  # orthonormalize the range
 
         logger.debug("running %i power iterations", power_iters)
-        for power_iter in xrange(power_iters):
+        for _ in xrange(power_iters):
             q = corpus.T * q
             q = [corpus * q]
             q, _ = matutils.qr_destroy(q)  # orthonormalize the range after each power iteration step
@@ -697,8 +706,10 @@ def stochastic_svd(corpus, rank, num_terms, chunksize=20000, extra_dims=None,
             num_docs += n
             logger.debug("multiplying chunk * gauss")
             o = np.random.normal(0.0, 1.0, (n, samples)).astype(dtype)  # draw a random gaussian matrix
-            sparsetools.csc_matvecs(m, n, samples, chunk.indptr, chunk.indices,  # y = y + chunk * o
-                                    chunk.data, o.ravel(), y.ravel())
+            sparsetools.csc_matvecs(
+                m, n, samples, chunk.indptr, chunk.indices,  # y = y + chunk * o
+                chunk.data, o.ravel(), y.ravel()
+            )
             del chunk, o
         y = [y]
         q, _ = matutils.qr_destroy(y)  # orthonormalize the range
@@ -723,7 +734,7 @@ def stochastic_svd(corpus, rank, num_terms, chunksize=20000, extra_dims=None,
 
     if scipy.sparse.issparse(corpus):
         b = qt * corpus
-        logger.info("2nd phase: running dense svd on %s matrix" % str(b.shape))
+        logger.info("2nd phase: running dense svd on %s matrix", str(b.shape))
         u, s, vt = scipy.linalg.svd(b, full_matrices=False)
         del b, vt
     else:
diff --git a/gensim/models/normmodel.py b/gensim/models/normmodel.py
index a78dc604dc..31e843beb3 100644
--- a/gensim/models/normmodel.py
+++ b/gensim/models/normmodel.py
@@ -55,7 +55,7 @@ def calc_norm(self, corpus):
         """
         Calculates the norm by calling matutils.unitvec with the norm parameter.
         """
-        logger.info("Performing %s normalization..." % (self.norm))
+        logger.info("Performing %s normalization...", self.norm)
         norms = []
         numnnz = 0
         docno = 0
@@ -73,4 +73,3 @@ def normalize(self, bow):
 
     def __getitem__(self, bow):
         return self.normalize(bow)
-# endclass NormModel
diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py
index 1f0826258c..263968526f 100644
--- a/gensim/models/phrases.py
+++ b/gensim/models/phrases.py
@@ -108,9 +108,8 @@ class Phrases(interfaces.TransformationABC):
 
     """
 
-    def __init__(self, sentences=None, min_count=5, threshold=10.0,
-                 max_vocab_size=40000000, delimiter=b'_', progress_per=10000,
-                 scoring='default'):
+    def __init__(self, sentences=None, min_count=5, threshold=10.0, max_vocab_size=40000000,
+                 delimiter=b'_', progress_per=10000, scoring='default'):
         """
         Initialize the model from an iterable of `sentences`. Each sentence must be
         a list of words (unicode strings) that will be used for training.
@@ -179,7 +178,8 @@ def __str__(self):
         """Get short string representation of this phrase detector."""
         return "%s<%i vocab, min_count=%s, threshold=%s, max_vocab_size=%s>" % (
             self.__class__.__name__, len(self.vocab), self.min_count,
-            self.threshold, self.max_vocab_size)
+            self.threshold, self.max_vocab_size
+        )
 
     @staticmethod
     def learn_vocab(sentences, max_vocab_size, delimiter=b'_', progress_per=10000):
@@ -191,8 +191,10 @@ def learn_vocab(sentences, max_vocab_size, delimiter=b'_', progress_per=10000):
         min_reduce = 1
         for sentence_no, sentence in enumerate(sentences):
             if sentence_no % progress_per == 0:
-                logger.info("PROGRESS: at sentence #%i, processed %i words and %i word types" %
-                            (sentence_no, total_words, len(vocab)))
+                logger.info(
+                    "PROGRESS: at sentence #%i, processed %i words and %i word types",
+                    sentence_no, total_words, len(vocab)
+                )
             sentence = [utils.any2utf8(w) for w in sentence]
             for bigram in zip(sentence, sentence[1:]):
                 vocab[bigram[0]] += 1
@@ -208,8 +210,10 @@ def learn_vocab(sentences, max_vocab_size, delimiter=b'_', progress_per=10000):
                 utils.prune_vocab(vocab, min_reduce)
                 min_reduce += 1
 
-        logger.info("collected %i word types from a corpus of %i words (unigram + bigrams) and %i sentences" %
-                    (len(vocab), total_words, sentence_no + 1))
+        logger.info(
+            "collected %i word types from a corpus of %i words (unigram + bigrams) and %i sentences",
+            len(vocab), total_words, sentence_no + 1
+        )
         return min_reduce, vocab, total_words
 
     def add_vocab(self, sentences):
@@ -262,11 +266,9 @@ def export_phrases(self, sentences, out_delimiter=b' ', as_tuples=False):
         corpus_word_count = self.corpus_word_count
 
         if scoring == 'default':
-            scoring_function = \
-            partial(self.original_scorer, len_vocab=float(len(vocab)), min_count=float(min_count))
+            scoring_function = partial(self.original_scorer, len_vocab=float(len(vocab)), min_count=float(min_count))
         elif scoring == 'npmi':
-            scoring_function = \
-            partial(self.npmi_scorer, corpus_word_count=corpus_word_count)
+            scoring_function = partial(self.npmi_scorer, corpus_word_count=corpus_word_count)
         # no else here to catch unknown scoring function, check is done in Phrases.__init__
 
         for sentence in sentences:
@@ -282,10 +284,6 @@ def export_phrases(self, sentences, out_delimiter=b' ', as_tuples=False):
                         count_b = float(vocab[word_b])
                         count_ab = float(vocab[bigram_word])
                         score = scoring_function(count_a, count_b, count_ab)
-                        # logger.debug("score for %s: (pab=%s - min_count=%s) / pa=%s / pb=%s * vocab_size=%s = %s",
-                        #     bigram_word, pab, self.min_count, pa, pb, len(self.vocab), score)
-                        # added mincount check because if the scorer doesn't contain min_count
-                        # it would not be enforced otherwise
                         if score > threshold and count_ab >= min_count:
                             if as_tuples:
                                 yield ((word_a, word_b), score)
@@ -336,8 +334,6 @@ def __getitem__(self, sentence):
                     pb = float(vocab[word_b])
                     pab = float(vocab[bigram_word])
                     score = (pab - min_count) / pa / pb * len(vocab)
-                    # logger.debug("score for %s: (pab=%s - min_count=%s) / pa=%s / pb=%s * vocab_size=%s = %s",
-                    #     bigram_word, pab, self.min_count, pa, pb, len(self.vocab), score)
                     if score > threshold:
                         new_s.append(bigram_word)
                         last_bigram = True
@@ -453,7 +449,7 @@ def __getitem__(self, sentence):
 
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s', level=logging.INFO)
-    logging.info("running %s" % " ".join(sys.argv))
+    logging.info("running %s", " ".join(sys.argv))
 
     # check and process cmdline input
     program = os.path.basename(sys.argv[0])
diff --git a/gensim/models/rpmodel.py b/gensim/models/rpmodel.py
index 1186a041a0..f5753c75c5 100644
--- a/gensim/models/rpmodel.py
+++ b/gensim/models/rpmodel.py
@@ -61,7 +61,7 @@ def initialize(self, corpus):
             self.num_terms = 1 + max([-1] + self.id2word.keys())
 
         shape = self.num_topics, self.num_terms
-        logger.info("constructing %s random matrix" % str(shape))
+        logger.info("constructing %s random matrix", str(shape))
         # Now construct the projection matrix itself.
         # Here i use a particular form, derived in "Achlioptas: Database-friendly random projection",
         # and his (1) scenario of Theorem 1.1 in particular (all entries are +1/-1).
@@ -89,10 +89,11 @@ def __getitem__(self, bow):
         vec = matutils.sparse2full(bow, self.num_terms).reshape(self.num_terms, 1) / np.sqrt(self.num_topics)
         vec = np.asfortranarray(vec, dtype=np.float32)
         topic_dist = np.dot(self.projection, vec)  # (k, d) * (d, 1) = (k, 1)
-        return [(topicid, float(topicvalue)) for topicid, topicvalue in enumerate(topic_dist.flat)
-                if np.isfinite(topicvalue) and not np.allclose(topicvalue, 0.0)]
+        return [
+            (topicid, float(topicvalue)) for topicid, topicvalue in enumerate(topic_dist.flat)
+            if np.isfinite(topicvalue) and not np.allclose(topicvalue, 0.0)
+        ]
 
     def __setstate__(self, state):
         self.__dict__ = state
         self.freshly_loaded = True
-# endclass RpModel
diff --git a/gensim/models/tfidfmodel.py b/gensim/models/tfidfmodel.py
index 4b5ba02e02..50320ad747 100644
--- a/gensim/models/tfidfmodel.py
+++ b/gensim/models/tfidfmodel.py
@@ -28,7 +28,7 @@ def precompute_idfs(wglobal, dfs, total_docs):
     """Precompute the inverse document frequency mapping for all terms."""
     # not strictly necessary and could be computed on the fly in TfidfModel__getitem__.
     # this method is here just to speed things up a little.
-    return dict((termid, wglobal(df, total_docs)) for termid, df in iteritems(dfs))
+    return {termid: wglobal(df, total_docs) for termid, df in iteritems(dfs)}
 
 
 class TfidfModel(interfaces.TransformationABC):
@@ -49,9 +49,8 @@ class TfidfModel(interfaces.TransformationABC):
     Model persistency is achieved via its load/save methods.
     """
 
-    def __init__(
-            self, corpus=None, id2word=None, dictionary=None,
-            wlocal=utils.identity, wglobal=df2idf, normalize=True):
+    def __init__(self, corpus=None, id2word=None, dictionary=None,
+                 wlocal=utils.identity, wglobal=df2idf, normalize=True):
         """
         Compute tf-idf by multiplying a local component (term frequency) with a
         global component (inverse document frequency), and normalizing
@@ -89,7 +88,8 @@ def __init__(
             # step that goes through the corpus (= an optimization).
             if corpus is not None:
                 logger.warning(
-                    "constructor received both corpus and explicit inverse document frequencies; ignoring the corpus")
+                    "constructor received both corpus and explicit inverse document frequencies; ignoring the corpus"
+                )
             self.num_docs, self.num_nnz = dictionary.num_docs, dictionary.num_nnz
             self.dfs = dictionary.dfs.copy()
             self.idfs = precompute_idfs(self.wglobal, self.dfs, self.num_docs)
@@ -129,7 +129,8 @@ def initialize(self, corpus):
         n_features = max(dfs) if dfs else 0
         logger.info(
             "calculating IDF weights for %i documents and %i features (%i matrix non-zeros)",
-            self.num_docs, n_features, self.num_nnz)
+            self.num_docs, n_features, self.num_nnz
+        )
         self.idfs = precompute_idfs(self.wglobal, self.dfs, self.num_docs)
 
     def __getitem__(self, bow, eps=1e-12):
@@ -158,4 +159,3 @@ def __getitem__(self, bow, eps=1e-12):
         # make sure there are no explicit zeroes in the vector (must be sparse)
         vector = [(termid, weight) for termid, weight in vector if abs(weight) > eps]
         return vector
-# endclass TfidfModel
diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
index 255b9c553f..2111478c00 100644
--- a/gensim/models/word2vec.py
+++ b/gensim/models/word2vec.py
@@ -159,7 +159,9 @@ def train_batch_sg(model, sentences, alpha, work=None, compute_loss=False):
                 for pos2, word2 in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start):
                     # don't train on the `word` itself
                     if pos2 != pos:
-                        train_sg_pair(model, model.wv.index2word[word.index], word2.index, alpha, compute_loss=compute_loss)
+                        train_sg_pair(
+                            model, model.wv.index2word[word.index], word2.index, alpha, compute_loss=compute_loss
+                        )
 
             result += len(word_vocabs)
         return result
@@ -382,11 +384,10 @@ class Word2Vec(utils.SaveLoad):
 
     """
 
-    def __init__(
-            self, sentences=None, size=100, alpha=0.025, window=5, min_count=5,
-            max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
-            sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
-            trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False):
+    def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5,
+                 max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
+                 sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
+                 trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False):
         """
         Initialize the model from an iterable of `sentences`. Each sentence is a
         list of words (unicode strings) that will be used for training.
@@ -460,9 +461,9 @@ def __init__(
         self.load = call_on_class_only
 
         if FAST_VERSION == -1:
-            logger.warning('Slow version of {0} is being used'.format(__name__))
+            logger.warning('Slow version of %s is being used', __name__)
         else:
-            logger.debug('Fast version of {0} is being used'.format(__name__))
+            logger.debug('Fast version of %s is being used', __name__)
 
         self.initialize_word_vectors()
         self.sg = int(sg)
@@ -498,12 +499,14 @@ def __init__(
             if isinstance(sentences, GeneratorType):
                 raise TypeError("You can't pass a generator as the sentences argument. Try an iterator.")
             self.build_vocab(sentences, trim_rule=trim_rule)
-            self.train(sentences, total_examples=self.corpus_count, epochs=self.iter,
-                       start_alpha=self.alpha, end_alpha=self.min_alpha)
+            self.train(sentences, total_examples=self.corpus_count, epochs=self.iter, start_alpha=self.alpha, end_alpha=self.min_alpha)
         else:
             if trim_rule is not None:
-                logger.warning("The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part of the model. ")
-                logger.warning("Model initialized without sentences. trim_rule provided, if any, will be ignored.")
+                logger.warning(
+                    "The rule, if given, is only used to prune vocabulary during build_vocab() "
+                    "and is not stored as part of the model. Model initialized without sentences. "
+                    "trim_rule provided, if any, will be ignored."
+                )
 
     def initialize_word_vectors(self):
         self.wv = KeyedVectors()
@@ -546,7 +549,9 @@ def create_binary_tree(self):
         heapq.heapify(heap)
         for i in xrange(len(self.wv.vocab) - 1):
             min1, min2 = heapq.heappop(heap), heapq.heappop(heap)
-            heapq.heappush(heap, Vocab(count=min1.count + min2.count, index=i + len(self.wv.vocab), left=min1, right=min2))
+            heapq.heappush(
+                heap, Vocab(count=min1.count + min2.count, index=i + len(self.wv.vocab), left=min1, right=min2)
+            )
 
         # recurse over the tree, assigning a binary code to each vocabulary word
         if heap:
@@ -587,13 +592,16 @@ def scan_vocab(self, sentences, progress_per=10000, trim_rule=None):
             if not checked_string_types:
                 if isinstance(sentence, string_types):
                     logger.warning(
-                        "Each 'sentences' item should be a list of words (usually unicode strings)."
-                        "First item here is instead plain %s.", type(sentence)
+                        "Each 'sentences' item should be a list of words (usually unicode strings). "
+                        "First item here is instead plain %s.",
+                        type(sentence)
                     )
                 checked_string_types += 1
             if sentence_no % progress_per == 0:
-                logger.info("PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
-                            sentence_no, sum(itervalues(vocab)) + total_words, len(vocab))
+                logger.info(
+                    "PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
+                    sentence_no, sum(itervalues(vocab)) + total_words, len(vocab)
+                )
             for word in sentence:
                 vocab[word] += 1
 
@@ -602,12 +610,15 @@ def scan_vocab(self, sentences, progress_per=10000, trim_rule=None):
                 min_reduce += 1
 
         total_words += sum(itervalues(vocab))
-        logger.info("collected %i word types from a corpus of %i raw words and %i sentences",
-                    len(vocab), total_words, sentence_no + 1)
+        logger.info(
+            "collected %i word types from a corpus of %i raw words and %i sentences",
+            len(vocab), total_words, sentence_no + 1
+        )
         self.corpus_count = sentence_no + 1
         self.raw_vocab = vocab
 
-    def scale_vocab(self, min_count=None, sample=None, dry_run=False, keep_raw_vocab=False, trim_rule=None, update=False):
+    def scale_vocab(self, min_count=None, sample=None, dry_run=False,
+                    keep_raw_vocab=False, trim_rule=None, update=False):
         """
         Apply vocabulary settings for `min_count` (discarding less-frequent words)
         and `sample` (controlling the downsampling of more-frequent words).
@@ -648,12 +659,16 @@ def scale_vocab(self, min_count=None, sample=None, dry_run=False, keep_raw_vocab
                     drop_total += v
             original_unique_total = len(retain_words) + drop_unique
             retain_unique_pct = len(retain_words) * 100 / max(original_unique_total, 1)
-            logger.info("min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
-                        min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique)
+            logger.info(
+                "min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
+                min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
+            )
             original_total = retain_total + drop_total
             retain_pct = retain_total * 100 / max(original_total, 1)
-            logger.info("min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)",
-                        min_count, retain_total, retain_pct, original_total, drop_total)
+            logger.info(
+                "min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)",
+                min_count, retain_total, retain_pct, original_total, drop_total
+            )
         else:
             logger.info("Updating model with new vocabulary")
             new_total = pre_exist_total = 0
@@ -677,10 +692,12 @@ def scale_vocab(self, min_count=None, sample=None, dry_run=False, keep_raw_vocab
             original_unique_total = len(pre_exist_words) + len(new_words) + drop_unique
             pre_exist_unique_pct = len(pre_exist_words) * 100 / max(original_unique_total, 1)
             new_unique_pct = len(new_words) * 100 / max(original_unique_total, 1)
-            logger.info("""New added %i unique words (%i%% of original %i)
-                        and increased the count of %i pre-existing words (%i%% of original %i)""",
-                        len(new_words), new_unique_pct, original_unique_total,
-                        len(pre_exist_words), pre_exist_unique_pct, original_unique_total)
+            logger.info(
+                "New added %i unique words (%i%% of original %i) "
+                "and increased the count of %i pre-existing words (%i%% of original %i)",
+                len(new_words), new_unique_pct, original_unique_total, len(pre_exist_words),
+                pre_exist_unique_pct, original_unique_total
+            )
             retain_words = new_words + pre_exist_words
             retain_total = new_total + pre_exist_total
 
@@ -713,15 +730,16 @@ def scale_vocab(self, min_count=None, sample=None, dry_run=False, keep_raw_vocab
             self.raw_vocab = defaultdict(int)
 
         logger.info("sample=%g downsamples %i most-common words", sample, downsample_unique)
-        logger.info("downsampling leaves estimated %i word corpus (%.1f%% of prior %i)",
-                    downsample_total, downsample_total * 100.0 / max(retain_total, 1), retain_total)
-
-        # return from each step: words-affected, resulting-corpus-size
-        report_values = {'drop_unique': drop_unique, 'retain_total': retain_total,
-                         'downsample_unique': downsample_unique, 'downsample_total': int(downsample_total)}
+        logger.info(
+            "downsampling leaves estimated %i word corpus (%.1f%% of prior %i)",
+            downsample_total, downsample_total * 100.0 / max(retain_total, 1), retain_total
+        )
 
-        # print extra memory estimates
-        report_values['memory'] = self.estimate_memory(vocab_size=len(retain_words))
+        # return from each step: words-affected, resulting-corpus-size, extra memory estimates
+        report_values = {
+            'drop_unique': drop_unique, 'retain_total': retain_total, 'downsample_unique': downsample_unique,
+            'downsample_total': int(downsample_total), 'memory': self.estimate_memory(vocab_size=len(retain_words))
+        }
 
         return report_values
 
@@ -787,8 +805,7 @@ def _raw_word_count(self, job):
         return sum(len(sentence) for sentence in job)
 
     def train(self, sentences, total_examples=None, total_words=None,
-              epochs=None, start_alpha=None, end_alpha=None,
-              word_count=0,
+              epochs=None, start_alpha=None, end_alpha=None, word_count=0,
               queue_factor=2, report_delay=1.0, compute_loss=None):
         """
         Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
@@ -804,11 +821,13 @@ def train(self, sentences, total_examples=None, total_words=None,
         explicit `epochs` argument MUST be provided. In the common and recommended case, where `train()`
         is only called once, the model's cached `iter` value should be supplied as `epochs` value.
         """
-        if (self.model_trimmed_post_training):
+        if self.model_trimmed_post_training:
             raise RuntimeError("Parameters for training were discarded using model_trimmed_post_training method")
         if FAST_VERSION < 0:
-            warnings.warn("C extension not loaded for Word2Vec, training will be slow. "
-                          "Install a C compiler and reinstall gensim for fast training.")
+            warnings.warn(
+                "C extension not loaded for Word2Vec, training will be slow. "
+                "Install a C compiler and reinstall gensim for fast training."
+            )
             self.neg_labels = []
             if self.negative > 0:
                 # precompute negative labels optimization for pure-python training
@@ -822,8 +841,8 @@ def train(self, sentences, total_examples=None, total_words=None,
         logger.info(
             "training model with %i workers on %i vocabulary and %i features, "
             "using sg=%s hs=%s sample=%s negative=%s window=%s",
-            self.workers, len(self.wv.vocab), self.layer1_size, self.sg,
-            self.hs, self.sample, self.negative, self.window)
+            self.workers, len(self.wv.vocab), self.layer1_size, self.sg, self.hs, self.sample, self.negative, self.window
+        )
 
         if not self.wv.vocab:
             raise RuntimeError("you must first build vocabulary before training the model")
@@ -834,10 +853,15 @@ def train(self, sentences, total_examples=None, total_words=None,
             raise ValueError(
                 "The number of sentences in the training corpus is missing. Did you load the model via KeyedVectors.load_word2vec_format?"
                 "Models loaded via load_word2vec_format don't support further training. "
-                "Instead start with a blank model, scan_vocab on the new corpus, intersect_word2vec_format with the old model, then train.")
+                "Instead start with a blank model, scan_vocab on the new corpus, "
+                "intersect_word2vec_format with the old model, then train."
+            )
 
         if total_words is None and total_examples is None:
-            raise ValueError("You must specify either total_examples or total_words, for proper alpha and progress calculations. The usual value is total_examples=model.corpus_count.")
+            raise ValueError(
+                "You must specify either total_examples or total_words, for proper alpha and progress calculations. "
+                "The usual value is total_examples=model.corpus_count."
+            )
         if epochs is None:
             raise ValueError("You must specify an explict epochs count. The usual value is epochs=model.iter.")
         start_alpha = start_alpha or self.alpha
@@ -872,9 +896,7 @@ def job_producer():
             pushed_words, pushed_examples = 0, 0
             next_alpha = start_alpha
             if next_alpha > self.min_alpha_yet_reached:
-                logger.warning(
-                    "Effective 'alpha' higher than previous training cycles"
-                )
+                logger.warning("Effective 'alpha' higher than previous training cycles")
             self.min_alpha_yet_reached = next_alpha
             job_no = 0
 
@@ -890,7 +912,8 @@ def job_producer():
                     # no => submit the existing job
                     logger.debug(
                         "queueing job #%i (%i words, %i sentences) at alpha %.05f",
-                        job_no, batch_size, len(job_batch), next_alpha)
+                        job_no, batch_size, len(job_batch), next_alpha
+                    )
                     job_no += 1
                     job_queue.put((job_batch, next_alpha))
 
@@ -914,15 +937,15 @@ def job_producer():
             if job_batch:
                 logger.debug(
                     "queueing job #%i (%i words, %i sentences) at alpha %.05f",
-                    job_no, batch_size, len(job_batch), next_alpha)
+                    job_no, batch_size, len(job_batch), next_alpha
+                )
                 job_no += 1
                 job_queue.put((job_batch, next_alpha))
 
             if job_no == 0 and self.train_count == 0:
                 logger.warning(
                     "train() called with an empty iterator (if not intended, "
-                    "be sure to provide a corpus that offers restartable "
-                    "iteration = an iterable)."
+                    "be sure to provide a corpus that offers restartable iteration = an iterable)."
                 )
 
             # give the workers heads up that they can finish -- no more work!
@@ -967,34 +990,31 @@ def job_producer():
                     logger.info(
                         "PROGRESS: at %.2f%% examples, %.0f words/s, in_qsize %i, out_qsize %i",
                         100.0 * example_count / total_examples, trained_word_count / elapsed,
-                        utils.qsize(job_queue), utils.qsize(progress_queue))
+                        utils.qsize(job_queue), utils.qsize(progress_queue)
+                    )
                 else:
                     # words-based progress %
                     logger.info(
                         "PROGRESS: at %.2f%% words, %.0f words/s, in_qsize %i, out_qsize %i",
                         100.0 * raw_word_count / total_words, trained_word_count / elapsed,
-                        utils.qsize(job_queue), utils.qsize(progress_queue))
+                        utils.qsize(job_queue), utils.qsize(progress_queue)
+                    )
                 next_report = elapsed + report_delay
 
         # all done; report the final stats
         elapsed = default_timer() - start
         logger.info(
             "training on %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
-            raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed)
+            raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed
+        )
         if job_tally < 10 * self.workers:
-            logger.warning(
-                "under 10 jobs per worker: consider setting a smaller `batch_words' for smoother alpha decay"
-            )
+            logger.warning("under 10 jobs per worker: consider setting a smaller `batch_words' for smoother alpha decay")
 
         # check that the input corpus hasn't changed during iteration
         if total_examples and total_examples != example_count:
-            logger.warning(
-                "supplied example count (%i) did not equal expected count (%i)", example_count, total_examples
-            )
+            logger.warning("supplied example count (%i) did not equal expected count (%i)", example_count, total_examples)
         if total_words and total_words != raw_word_count:
-            logger.warning(
-                "supplied raw word count (%i) did not equal expected count (%i)", raw_word_count, total_words
-            )
+            logger.warning("supplied raw word count (%i) did not equal expected count (%i)", raw_word_count, total_words)
 
         self.train_count += 1  # number of times train() has been called
         self.total_train_time += elapsed
@@ -1021,21 +1041,25 @@ def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor
 
         """
         if FAST_VERSION < 0:
-            warnings.warn("C extension compilation failed, scoring will be slow. "
-                          "Install a C compiler and reinstall gensim for fastness.")
+            warnings.warn(
+                "C extension compilation failed, scoring will be slow. "
+                "Install a C compiler and reinstall gensim for fastness."
+            )
 
         logger.info(
             "scoring sentences with %i workers on %i vocabulary and %i features, "
             "using sg=%s hs=%s sample=%s and negative=%s",
-            self.workers, len(self.wv.vocab), self.layer1_size, self.sg, self.hs, self.sample, self.negative)
+            self.workers, len(self.wv.vocab), self.layer1_size, self.sg, self.hs, self.sample, self.negative
+        )
 
         if not self.wv.vocab:
             raise RuntimeError("you must first build vocabulary before scoring new data")
 
         if not self.hs:
-            raise RuntimeError("We have currently only implemented score \
-                    for the hierarchical softmax scheme, so you need to have \
-                    run word2vec with hs=1 and negative=0 for this to work.")
+            raise RuntimeError(
+                "We have currently only implemented score for the hierarchical softmax scheme, "
+                "so you need to have run word2vec with hs=1 and negative=0 for this to work."
+            )
 
         def worker_loop():
             """Compute log probability for each sentence, lifting lists of sentences from the jobs queue."""
@@ -1081,15 +1105,14 @@ def worker_loop():
                 if (job_no - 1) * chunksize > total_sentences:
                     logger.warning(
                         "terminating after %i sentences (set higher total_sentences if you want more).",
-                        total_sentences)
+                        total_sentences
+                    )
                     job_no -= 1
                     raise StopIteration()
                 logger.debug("putting job #%i in the queue", job_no)
                 job_queue.put(items)
             except StopIteration:
-                logger.info(
-                    "reached end of input; waiting to finish %i outstanding jobs",
-                    job_no - done_jobs + 1)
+                logger.info("reached end of input; waiting to finish %i outstanding jobs", job_no - done_jobs + 1)
                 for _ in xrange(self.workers):
                     job_queue.put(None)  # give the workers heads up that they can finish -- no more work!
                 push_done = True
@@ -1102,7 +1125,8 @@ def worker_loop():
                     if elapsed >= next_report:
                         logger.info(
                             "PROGRESS: at %.2f%% sentences, %.0f sentences/s",
-                            100.0 * sentence_count, sentence_count / elapsed)
+                            100.0 * sentence_count, sentence_count / elapsed
+                        )
                         next_report = elapsed + report_delay  # don't flood log, wait report_delay seconds
                 else:
                     # loop ended by job count; really done
@@ -1114,7 +1138,8 @@ def worker_loop():
         self.clear_sims()
         logger.info(
             "scoring %i sentences took %.1fs, %.0f sentences/s",
-            sentence_count, elapsed, sentence_count / elapsed)
+            sentence_count, elapsed, sentence_count / elapsed
+        )
         return sentence_scores[:sentence_count]
 
     def clear_sims(self):
@@ -1141,9 +1166,10 @@ def update_weights(self):
 
         # Raise an error if an online update is run before initial training on a corpus
         if not len(self.wv.syn0):
-            raise RuntimeError("You cannot do an online vocabulary-update of a model which has no prior vocabulary. "
-                "First build the vocabulary of your model with a corpus "
-                "before doing an online update.")
+            raise RuntimeError(
+                "You cannot do an online vocabulary-update of a model which has no prior vocabulary. "
+                "First build the vocabulary of your model with a corpus before doing an online update."
+            )
 
         self.wv.syn0 = vstack([self.wv.syn0, newsyn0])
 
@@ -1192,16 +1218,16 @@ def intersect_word2vec_format(self, fname, lockf=0.0, binary=False, encoding='ut
         training. Use 1.0 to allow further training updates of merged vectors.
         """
         overlap_count = 0
-        logger.info("loading projection weights from %s" % (fname))
+        logger.info("loading projection weights from %s", fname)
         with utils.smart_open(fname) as fin:
             header = utils.to_unicode(fin.readline(), encoding=encoding)
-            vocab_size, vector_size = map(int, header.split())  # throws for invalid file format
+            vocab_size, vector_size = (int(x) for x in header.split())  # throws for invalid file format
             if not vector_size == self.vector_size:
                 raise ValueError("incompatible vector size %d in file %s" % (vector_size, fname))
                 # TOCONSIDER: maybe mismatched vectors still useful enough to merge (truncating/padding)?
             if binary:
                 binary_len = dtype(REAL).itemsize * vector_size
-                for line_no in xrange(vocab_size):
+                for _ in xrange(vocab_size):
                     # mixed text and binary: read text first, then binary
                     word = []
                     while True:
@@ -1220,13 +1246,13 @@ def intersect_word2vec_format(self, fname, lockf=0.0, binary=False, encoding='ut
                 for line_no, line in enumerate(fin):
                     parts = utils.to_unicode(line.rstrip(), encoding=encoding, errors=unicode_errors).split(" ")
                     if len(parts) != vector_size + 1:
-                        raise ValueError("invalid vector on line %s (is this really the text format?)" % (line_no))
-                    word, weights = parts[0], list(map(REAL, parts[1:]))
+                        raise ValueError("invalid vector on line %s (is this really the text format?)" % line_no)
+                    word, weights = parts[0], [REAL(x) for x in parts[1:]]
                     if word in self.wv.vocab:
                         overlap_count += 1
                         self.wv.syn0[self.wv.vocab[word].index] = weights
                         self.syn0_lockf[self.wv.vocab[word].index] = lockf  # lock-factor: 0.0 stops further changes
-        logger.info("merged %d vectors into %s matrix from %s" % (overlap_count, self.wv.syn0.shape, fname))
+        logger.info("merged %d vectors into %s matrix from %s", overlap_count, self.wv.syn0.shape, fname)
 
     def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=None, indexer=None):
         """
@@ -1301,9 +1327,10 @@ def n_similarity(self, ws1, ws2):
     def predict_output_word(self, context_words_list, topn=10):
         """Report the probability distribution of the center word given the context words as input to the trained model."""
         if not self.negative:
-            raise RuntimeError("We have currently only implemented predict_output_word "
-                "for the negative sampling scheme, so you need to have "
-                "run word2vec with negative > 0 for this to work.")
+            raise RuntimeError(
+                "We have currently only implemented predict_output_word for the negative sampling scheme, "
+                "so you need to have run word2vec with negative > 0 for this to work."
+            )
 
         if not hasattr(self.wv, 'syn0') or not hasattr(self, 'syn1neg'):
             raise RuntimeError("Parameters required for predicting the output words not found.")
@@ -1344,8 +1371,10 @@ def estimate_memory(self, vocab_size=None, report=None):
         if self.negative:
             report['syn1neg'] = vocab_size * self.layer1_size * dtype(REAL).itemsize
         report['total'] = sum(report.values())
-        logger.info("estimated required memory for %i words and %i dimensions: %i bytes",
-                    vocab_size, self.vector_size, report['total'])
+        logger.info(
+            "estimated required memory for %i words and %i dimensions: %i bytes",
+            vocab_size, self.vector_size, report['total']
+        )
         return report
 
     @staticmethod
@@ -1375,7 +1404,11 @@ def __str__(self):
         return "%s(vocab=%s, size=%s, alpha=%s)" % (self.__class__.__name__, len(self.wv.index2word), self.vector_size, self.alpha)
 
     def _minimize_model(self, save_syn1=False, save_syn1neg=False, save_syn0_lockf=False):
-        warnings.warn("This method would be deprecated in the future. Keep just_word_vectors = model.wv to retain just the KeyedVectors instance for read-only querying of word vectors.")
+        warnings.warn(
+            "This method would be deprecated in the future. "
+            "Keep just_word_vectors = model.wv to retain just the KeyedVectors instance "
+            "for read-only querying of word vectors."
+        )
         if save_syn1 and save_syn1neg and save_syn0_lockf:
             return
         if hasattr(self, 'syn1') and not save_syn1:
@@ -1581,19 +1614,19 @@ def __init__(self, source, max_sentence_length=MAX_WORDS_IN_BATCH, limit=None):
             self.input_files = [self.source]  # force code compatibility with list of files
         elif os.path.isdir(self.source):
             self.source = os.path.join(self.source, '')  # ensures os-specific slash at end of path
-            logging.debug('reading directory ' + self.source)
+            logging.debug('reading directory %s', self.source)
             self.input_files = os.listdir(self.source)
             self.input_files = [self.source + file for file in self.input_files]  # make full paths
             self.input_files.sort()  # makes sure it happens in filename order
         else:  # not a file or a directory, then we can't do anything with it
             raise ValueError('input is neither a file nor a path')
 
-        logging.info('files read into PathLineSentences:' + '\n'.join(self.input_files))
+        logging.info('files read into PathLineSentences:%s', '\n'.join(self.input_files))
 
     def __iter__(self):
-        '''iterate through the files'''
+        """iterate through the files"""
         for file_name in self.input_files:
-            logging.info('reading file ' + file_name)
+            logging.info('reading file %s', file_name)
             with utils.smart_open(file_name) as fin:
                 for line in itertools.islice(fin, self.limit):
                     line = utils.to_unicode(line).split()
@@ -1649,7 +1682,8 @@ def __iter__(self):
     model = Word2Vec(
         corpus, size=args.size, min_count=args.min_count, workers=args.threads,
         window=args.window, sample=args.sample, sg=skipgram, hs=args.hs,
-        negative=args.negative, cbow_mean=1, iter=args.iter)
+        negative=args.negative, cbow_mean=1, iter=args.iter
+    )
 
     if args.output:
         outfile = args.output
diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx
index b60f158c65..fe988fc24c 100755
--- a/gensim/models/word2vec_inner.pyx
+++ b/gensim/models/word2vec_inner.pyx
@@ -579,7 +579,7 @@ cdef void score_pair_sg_hs(
         row2 = word_point[b] * size
         f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE)
         sgn = (-1)**word_code[b] # ch function: 0-> 1, 1 -> -1
-        f = sgn*f
+        f *= sgn
         if f <= -MAX_EXP or f >= MAX_EXP:
             continue
         f = LOG_TABLE[<int>((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))]
@@ -673,7 +673,7 @@ cdef void score_pair_cbow_hs(
         row2 = word_point[b] * size
         f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE)
         sgn = (-1)**word_code[b] # ch function: 0-> 1, 1 -> -1
-        f = sgn*f
+        f *= sgn
         if f <= -MAX_EXP or f >= MAX_EXP:
             continue
         f = LOG_TABLE[<int>((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))]
diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py
index bc02663e04..1f450a457a 100644
--- a/gensim/models/wrappers/dtmmodel.py
+++ b/gensim/models/wrappers/dtmmodel.py
@@ -41,9 +41,9 @@ class DtmModel(utils.SaveLoad):
 
     """
 
-    def __init__(
-            self, dtm_path, corpus=None, time_slices=None, mode='fit', model='dtm', num_topics=100, id2word=None, prefix=None,
-            lda_sequence_min_iter=6, lda_sequence_max_iter=20, lda_max_em_iter=10, alpha=0.01, top_chain_var=0.005, rng_seed=0, initialize_lda=True):
+    def __init__(self, dtm_path, corpus=None, time_slices=None, mode='fit', model='dtm', num_topics=100,
+                 id2word=None, prefix=None, lda_sequence_min_iter=6, lda_sequence_max_iter=20, lda_max_em_iter=10,
+                 alpha=0.01, top_chain_var=0.005, rng_seed=0, initialize_lda=True):
         """
         `dtm_path` is path to the dtm executable, e.g. `C:/dtm/dtm-win64.exe`.
 
@@ -88,7 +88,7 @@ def __init__(
 
         try:
             lencorpus = len(corpus)
-        except Exception:
+        except TypeError:
             logger.warning("input corpus stream has no len(); counting documents")
             lencorpus = sum(1 for _ in corpus)
         if lencorpus == 0:
@@ -97,8 +97,10 @@ def __init__(
             raise ValueError("""There is a text without words in the input corpus.
                     This breaks method='fixed' (The DIM model).""")
         if lencorpus != sum(time_slices):
-            raise ValueError("mismatched timeslices %{slices} for corpus of len {clen}".format(
-                slices=sum(time_slices), clen=lencorpus))
+            raise ValueError(
+                "mismatched timeslices %{slices} for corpus of len {clen}"
+                .format(slices=sum(time_slices), clen=lencorpus)
+            )
         self.lencorpus = lencorpus
         if prefix is None:
             rand_prefix = hex(random.randint(0, 0xffffff))[2:] + '_'
@@ -171,7 +173,7 @@ def convert_input(self, corpus, time_slices):
         Serialize documents in LDA-C format to a temporary text file,.
 
         """
-        logger.info("serializing temporary corpus to %s" % self.fcorpustxt())
+        logger.info("serializing temporary corpus to %s", self.fcorpustxt())
         # write out the corpus in a file format that DTM understands:
         corpora.BleiCorpus.save_corpus(self.fcorpustxt(), corpus)
 
@@ -187,17 +189,25 @@ def train(self, corpus, time_slices, mode, model):
         """
         self.convert_input(corpus, time_slices)
 
-        arguments = "--ntopics={p0} --model={mofrl}  --mode={p1} --initialize_lda={p2} --corpus_prefix={p3} --outname={p4} --alpha={p5}".format(
-            p0=self.num_topics, mofrl=model, p1=mode, p2=self.initialize_lda, p3=self.fcorpus(), p4=self.foutname(), p5=self.alpha)
+        arguments = \
+            "--ntopics={p0} --model={mofrl}  --mode={p1} --initialize_lda={p2} --corpus_prefix={p3} " \
+            "--outname={p4} --alpha={p5}".format(
+                p0=self.num_topics, mofrl=model, p1=mode, p2=self.initialize_lda,
+                p3=self.fcorpus(), p4=self.foutname(), p5=self.alpha
+            )
 
-        params = "--lda_max_em_iter={p0} --lda_sequence_min_iter={p1}  --lda_sequence_max_iter={p2} --top_chain_var={p3} --rng_seed={p4} ".format(
-            p0=self.lda_max_em_iter, p1=self.lda_sequence_min_iter, p2=self.lda_sequence_max_iter, p3=self.top_chain_var, p4=self.rng_seed)
+        params = \
+            "--lda_max_em_iter={p0} --lda_sequence_min_iter={p1}  --lda_sequence_max_iter={p2} " \
+            "--top_chain_var={p3} --rng_seed={p4} ".format(
+                p0=self.lda_max_em_iter, p1=self.lda_sequence_min_iter, p2=self.lda_sequence_max_iter,
+                p3=self.top_chain_var, p4=self.rng_seed
+            )
 
         arguments = arguments + " " + params
-        logger.info("training DTM with args %s" % arguments)
+        logger.info("training DTM with args %s", arguments)
 
         cmd = [self.dtm_path] + arguments.split()
-        logger.info("Running command %s" % cmd)
+        logger.info("Running command %s", cmd)
         check_output(args=cmd, stderr=PIPE)
 
         self.em_steps = np.loadtxt(self.fem_steps())
@@ -255,13 +265,6 @@ def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted
         else:
             num_topics = min(num_topics, self.num_topics)
             chosen_topics = range(num_topics)
-            # add a little random jitter, to randomize results around the same
-            # alpha
-            # sort_alpha = self.alpha + 0.0001 * \
-            #     numpy.random.rand(len(self.alpha))
-            # sorted_topics = list(numpy.argsort(sort_alpha))
-            # chosen_topics = sorted_topics[: topics / 2] + \
-            #     sorted_topics[-topics / 2:]
 
         if times < 0 or times >= len(self.time_slices):
             times = len(self.time_slices)
@@ -278,9 +281,6 @@ def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted
                 else:
                     topic = self.show_topic(i, time, num_words=num_words)
                 shown.append(topic)
-                # if log:
-                # logger.info("topic #%i (%.3f): %s" % (i, self.alpha[i],
-                #     topic))
         return shown
 
     def show_topic(self, topicid, time, topn=50, num_words=None):
@@ -290,25 +290,30 @@ def show_topic(self, topicid, time, topn=50, num_words=None):
 
         """
         if num_words is not None:  # deprecated num_words is used
-            logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
-            logger.warning("Please use topn instead.")
+            logger.warning(
+                "The parameter num_words for show_topic() would be deprecated in the updated version. "
+                "Please use topn instead."
+            )
             topn = num_words
 
         topics = self.lambda_[:, :, time]
         topic = topics[topicid]
-        # liklihood to probability
+        # likelihood to probability
         topic = np.exp(topic)
         # normalize to probability dist
         topic = topic / topic.sum()
         # sort according to prob
         bestn = matutils.argsort(topic, topn, reverse=True)
-        beststr = [(topic[id], self.id2word[id]) for id in bestn]
+        beststr = [(topic[idx], self.id2word[idx]) for idx in bestn]
         return beststr
 
     def print_topic(self, topicid, time, topn=10, num_words=None):
         """Return the given topic, formatted as a string."""
         if num_words is not None:  # deprecated num_words is used
-            warnings.warn("The parameter num_words for print_topic() would be deprecated in the updated version. Please use topn instead.")
+            warnings.warn(
+                "The parameter num_words for print_topic() would be deprecated in the updated version. "
+                "Please use topn instead."
+            )
             topn = num_words
 
         return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, topn)])
@@ -320,7 +325,7 @@ def dtm_vis(self, corpus, time):
         input parameter is the year to do the visualisation.
         """
         topic_term = np.exp(self.lambda_[:, :, time]) / np.exp(self.lambda_[:, :, time]).sum()
-        topic_term = topic_term * self.num_topics
+        topic_term *= self.num_topics
 
         doc_topic = self.gamma_
 
diff --git a/gensim/models/wrappers/fasttext.py b/gensim/models/wrappers/fasttext.py
index 839cb46633..c2c8ee0688 100644
--- a/gensim/models/wrappers/fasttext.py
+++ b/gensim/models/wrappers/fasttext.py
@@ -19,7 +19,7 @@
 Example:
 
 >>> from gensim.models.wrappers import FastText
->>> model = fasttext.FastText.train('/Users/kofola/fastText/fasttext', corpus_file='text8')
+>>> model = FastText.train('/Users/kofola/fastText/fasttext', corpus_file='text8')
 >>> print model['forests']  # prints vector for given out-of-vocabulary word
 
 .. [1] https://github.com/facebookresearch/fastText#enriching-word-vectors-with-subword-information
@@ -41,6 +41,11 @@
 
 logger = logging.getLogger(__name__)
 
+try:
+    FileNotFoundError
+except NameError:
+    FileNotFoundError = IOError
+
 FASTTEXT_FILEFORMAT_MAGIC = 793712314
 
 
@@ -146,7 +151,7 @@ def initialize_word_vectors(self):
 
     @classmethod
     def train(cls, ft_path, corpus_file, output_file=None, model='cbow', size=100, alpha=0.025, window=5, min_count=5,
-            word_ngrams=1, loss='ns', sample=1e-3, negative=5, iter=5, min_n=3, max_n=6, sorted_vocab=1, threads=12):
+              word_ngrams=1, loss='ns', sample=1e-3, negative=5, iter=5, min_n=3, max_n=6, sorted_vocab=1, threads=12):
         """
         `ft_path` is the path to the FastText executable, e.g. `/home/kofola/fastText/fasttext`.
 
@@ -212,7 +217,7 @@ def train(cls, ft_path, corpus_file, output_file=None, model='cbow', size=100, a
             cmd.append("-%s" % option)
             cmd.append(str(value))
 
-        output = utils.check_output(args=cmd)  # noqa:F841
+        utils.check_output(args=cmd)
         model = cls.load_fasttext_format(output_file)
         cls.delete_training_files(output_file)
         return model
@@ -265,17 +270,17 @@ def load_model_params(self, file_handle):
         magic, version = self.struct_unpack(file_handle, '@2i')
         if magic == FASTTEXT_FILEFORMAT_MAGIC:  # newer format
             self.new_format = True
-            dim, ws, epoch, minCount, neg, _, loss, model, bucket, minn, maxn, _, t = self.struct_unpack(file_handle, '@12i1d')
+            dim, ws, epoch, min_count, neg, _, loss, model, bucket, minn, maxn, _, t = self.struct_unpack(file_handle, '@12i1d')
         else:  # older format
             self.new_format = False
             dim = magic
             ws = version
-            epoch, minCount, neg, _, loss, model, bucket, minn, maxn, _, t = self.struct_unpack(file_handle, '@10i1d')
+            epoch, min_count, neg, _, loss, model, bucket, minn, maxn, _, t = self.struct_unpack(file_handle, '@10i1d')
         # Parameters stored by [Args::save](https://github.com/facebookresearch/fastText/blob/master/src/args.cc)
         self.vector_size = dim
         self.window = ws
         self.iter = epoch
-        self.min_count = minCount
+        self.min_count = min_count
         self.negative = neg
         self.hs = loss == 1
         self.sg = model == 2
@@ -307,7 +312,9 @@ def load_dict(self, file_handle, encoding='utf8'):
                 # For more info : https://github.com/facebookresearch/fastText/issues/218
 
                 assert word == "__label__", (
-                    'mismatched vocab_size ({}) and nwords ({}), extra word "{}"'.format(vocab_size, nwords, word))
+                    'mismatched vocab_size ({}) and nwords ({}), extra word "{}"'
+                    .format(vocab_size, nwords, word)
+                )
                 continue   # don't add word to vocab
 
             self.wv.vocab[word] = Vocab(index=i, count=count)
@@ -320,7 +327,8 @@ def load_dict(self, file_handle, encoding='utf8'):
             # expecting to log this warning only for pretrained french vector, wiki.fr
             logger.warning(
                 "mismatch between final vocab size (%s words), and expected vocab size (%s words)",
-                len(self.wv.vocab), vocab_size)
+                len(self.wv.vocab), vocab_size
+            )
 
         if self.new_format:
             for j in range(pruneidx_size):
@@ -332,7 +340,9 @@ def load_vectors(self, file_handle):
         num_vectors, dim = self.struct_unpack(file_handle, '@2q')
         # Vectors stored by [Matrix::save](https://github.com/facebookresearch/fastText/blob/master/src/matrix.cc)
         assert self.vector_size == dim, (
-            'mismatch between vector size in model params ({}) and model vectors ({})'.format(self.vector_size, dim))
+            'mismatch between vector size in model params ({}) and model vectors ({})'
+            .format(self.vector_size, dim)
+        )
         float_size = struct.calcsize('@f')
         if float_size == 4:
             dtype = np.dtype(np.float32)
@@ -343,8 +353,10 @@ def load_vectors(self, file_handle):
         self.wv.syn0_all = np.fromfile(file_handle, dtype=dtype, count=num_vectors * dim)
         self.wv.syn0_all = self.wv.syn0_all.reshape((num_vectors, dim))
         assert self.wv.syn0_all.shape == (self.bucket + len(self.wv.vocab), self.vector_size), \
-            'mismatch between actual weight matrix shape {} and expected shape {}'.format(
-                self.wv.syn0_all.shape, (self.bucket + len(self.wv.vocab), self.vector_size))
+            'mismatch between actual weight matrix shape {} and expected shape {}'\
+            .format(
+                self.wv.syn0_all.shape, (self.bucket + len(self.wv.vocab), self.vector_size)
+            )
 
         self.init_ngrams()
 
@@ -378,7 +390,10 @@ def init_ngrams(self):
 
         ngram_weights = self.wv.syn0_all
 
-        logger.info("loading weights for %s words for fastText model from %s", len(self.wv.vocab), self.file_name)
+        logger.info(
+            "loading weights for %s words for fastText model from %s",
+            len(self.wv.vocab), self.file_name
+        )
 
         for w, vocab in self.wv.vocab.items():
             word_ngrams = self.compute_ngrams(w, self.wv.min_n, self.wv.max_n)
@@ -386,7 +401,10 @@ def init_ngrams(self):
                 self.wv.syn0[vocab.index] += np.array(ngram_weights[self.wv.ngrams[word_ngram]])
 
             self.wv.syn0[vocab.index] /= (len(word_ngrams) + 1)
-        logger.info("loaded %s weight matrix for fastText model from %s", self.wv.syn0.shape, self.file_name)
+        logger.info(
+            "loaded %s weight matrix for fastText model from %s",
+            self.wv.syn0.shape, self.file_name
+        )
 
     @staticmethod
     def compute_ngrams(word, min_n, max_n):
diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py
index a4e435810f..19c93e5f6c 100644
--- a/gensim/models/wrappers/ldamallet.py
+++ b/gensim/models/wrappers/ldamallet.py
@@ -143,7 +143,7 @@ def convert_input(self, corpus, infer=False, serialize_corpus=True):
                 self.corpus2mallet(corpus, fout)
 
         # convert the text file above into MALLET's internal format
-        cmd = self.mallet_path + ' import-file --preserve-case --keep-sequence --remove-stopwords --token-regex "\S+" --input %s --output %s'
+        cmd = self.mallet_path + " import-file --preserve-case --keep-sequence --remove-stopwords --token-regex \"\S+\" --input %s --output %s"
         if infer:
             cmd += ' --use-pipe-from ' + self.fcorpusmallet()
             cmd = cmd % (self.fcorpustxt(), self.fcorpusmallet() + '.infer')
@@ -158,8 +158,10 @@ def train(self, corpus):
             '--num-threads %s --output-state %s --output-doc-topics %s --output-topic-keys %s '\
             '--num-iterations %s --inferencer-filename %s --doc-topics-threshold %s'
         cmd = cmd % (
-            self.fcorpusmallet(), self.num_topics, self.alpha, self.optimize_interval, self.workers,
-            self.fstate(), self.fdoctopics(), self.ftopickeys(), self.iterations, self.finferencer(), self.topic_threshold)
+            self.fcorpusmallet(), self.num_topics, self.alpha, self.optimize_interval,
+            self.workers, self.fstate(), self.fdoctopics(), self.ftopickeys(), self.iterations,
+            self.finferencer(), self.topic_threshold
+        )
         # NOTE "--keep-sequence-bigrams" / "--use-ngrams true" poorer results + runs out of memory
         logger.info("training MALLET LDA with %s", cmd)
         check_output(args=cmd, shell=True)
@@ -176,7 +178,10 @@ def __getitem__(self, bow, iterations=100):
 
         self.convert_input(bow, infer=True)
         cmd = self.mallet_path + ' infer-topics --input %s --inferencer %s --output-doc-topics %s --num-iterations %s --doc-topics-threshold %s'
-        cmd = cmd % (self.fcorpusmallet() + '.infer', self.finferencer(), self.fdoctopics() + '.infer', iterations, self.topic_threshold)
+        cmd = cmd % (
+            self.fcorpusmallet() + '.infer', self.finferencer(),
+            self.fdoctopics() + '.infer', iterations, self.topic_threshold
+        )
         logger.info("inferring topics with MALLET LDA '%s'", cmd)
         check_output(args=cmd, shell=True)
         result = list(self.read_doctopics(self.fdoctopics() + '.infer'))
@@ -255,13 +260,11 @@ def show_topic(self, topicid, topn=10, num_words=None):
             topn = num_words
 
         if self.word_topics is None:
-            logger.warning(
-                "Run train or load_word_topics before showing topics."
-            )
+            logger.warning("Run train or load_word_topics before showing topics.")
         topic = self.word_topics[topicid]
         topic = topic / topic.sum()  # normalize to probability dist
         bestn = matutils.argsort(topic, topn, reverse=True)
-        beststr = [(self.id2word[id], topic[id]) for id in bestn]
+        beststr = [(self.id2word[idx], topic[idx]) for idx in bestn]
         return beststr
 
     def get_version(self, direc_path):
@@ -305,14 +308,9 @@ def read_doctopics(self, fname, eps=1e-6, renorm=True):
                 # the MALLET doctopic format changed in 2.0.8 to exclude the id,
                 # this handles the file differently dependent on the pattern
                 if len(parts) == 2 * self.num_topics:
-                    doc = [(id_, weight)
-                           for id_, weight in zip(map(int, parts[::2]),
-                                                  map(float, parts[1::2]))
-                           if abs(weight) > eps]
+                    doc = [(int(id_), float(weight)) for id_, weight in zip(*[iter(parts)] * 2) if abs(float(weight)) > eps]
                 elif len(parts) == self.num_topics and mallet_version != '2.0.7':
-                    doc = [(id_, weight)
-                           for id_, weight in enumerate(map(float, parts))
-                           if abs(weight) > eps]
+                    doc = [(id_, float(weight)) for id_, weight in enumerate(parts) if abs(float(weight)) > eps]
                 else:
                     if mallet_version == "2.0.7":
                         """
@@ -375,6 +373,7 @@ def malletmodel2ldamodel(mallet_model, gamma_threshold=0.001, iterations=50):
     model_gensim = LdaModel(
         id2word=mallet_model.id2word, num_topics=mallet_model.num_topics,
         alpha=mallet_model.alpha, iterations=iterations,
-        gamma_threshold=gamma_threshold)
+        gamma_threshold=gamma_threshold
+    )
     model_gensim.expElogbeta[:] = mallet_model.wordtopics
     return model_gensim
diff --git a/gensim/models/wrappers/ldavowpalwabbit.py b/gensim/models/wrappers/ldavowpalwabbit.py
index afa19c4327..ede5074b99 100644
--- a/gensim/models/wrappers/ldavowpalwabbit.py
+++ b/gensim/models/wrappers/ldavowpalwabbit.py
@@ -68,7 +68,7 @@
 from gensim import utils, matutils
 from gensim.models.ldamodel import LdaModel
 
-LOG = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
 
 class LdaVowpalWabbit(utils.SaveLoad):
@@ -140,11 +140,10 @@ def __init__(self, vw_path, corpus=None, num_topics=100, id2word=None,
 
         if self.id2word is None:
             if corpus is None:
-                raise ValueError('at least one of corpus/id2word must be '
-                                 'specified, to establish input space '
-                                 'dimensionality')
-            LOG.warning('no word id mapping provided; initializing from '
-                        'corpus, assuming identity')
+                raise ValueError(
+                    "at least one of corpus/id2word must be specified, to establish input space dimensionality"
+                )
+            logger.warning("no word id mapping provided; initializing from corpus, assuming identity")
             self.id2word = utils.dict_from_corpus(corpus)
             self.num_terms = len(self.id2word)
         elif len(self.id2word) > 0:
@@ -153,8 +152,7 @@ def __init__(self, vw_path, corpus=None, num_topics=100, id2word=None,
             self.num_terms = 0
 
         if self.num_terms == 0:
-            raise ValueError('cannot compute LDA over an empty collection '
-                             '(no terms)')
+            raise ValueError("cannot compute LDA over an empty collection (no terms)")
 
         # LDA parameters
         self.num_topics = num_topics
@@ -186,7 +184,7 @@ def __init__(self, vw_path, corpus=None, num_topics=100, id2word=None,
 
     def train(self, corpus):
         """Clear any existing model state, and train on given corpus."""
-        LOG.debug('Training new model from corpus')
+        logger.debug('Training new model from corpus')
 
         # reset any existing offset, model, or topics generated
         self.offset = self._initial_offset
@@ -206,7 +204,7 @@ def update(self, corpus):
         if not os.path.exists(self._model_filename):
             return self.train(corpus)
 
-        LOG.debug('Updating exiting model from corpus')
+        logger.debug('Updating exiting model from corpus')
 
         # reset any existing topics generated
         self._topics = None
@@ -228,12 +226,10 @@ def log_perplexity(self, chunk):
         vw_data = self._predict(chunk)[1]
         corpus_words = sum(cnt for document in chunk for _, cnt in document)
         bound = -vw_data['average_loss']
-        LOG.info("%.3f per-word bound, %.1f perplexity estimate based on a "
-                 "held-out corpus of %i documents with %i words",
-                 bound,
-                 numpy.exp2(-bound),
-                 vw_data['corpus_size'],
-                 corpus_words)
+        logger.info(
+            "%.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words",
+            bound, numpy.exp2(-bound), vw_data['corpus_size'], corpus_words
+        )
         return bound
 
     def get_topics(self):
@@ -267,7 +263,7 @@ def show_topics(self, num_topics=10, num_words=10,
             shown.append(topic)
 
             if log:
-                LOG.info("topic #%i (%.3f): %s", i, self.alpha, topic)
+                logger.info("topic #%i (%.3f): %s", i, self.alpha, topic)
 
         return shown
 
@@ -287,12 +283,12 @@ def save(self, fname, *args, **kwargs):
             # Vowpal Wabbit uses its own binary model file, read this into
             # variable before serialising this object - keeps all data
             # self contained within a single serialised file
-            LOG.debug("Reading model bytes from '%s'", self._model_filename)
+            logger.debug("Reading model bytes from '%s'", self._model_filename)
             with utils.smart_open(self._model_filename, 'rb') as fhandle:
                 self._model_data = fhandle.read()
 
         if os.path.exists(self._topics_filename):
-            LOG.debug("Reading topic bytes from '%s'", self._topics_filename)
+            logger.debug("Reading topic bytes from '%s'", self._topics_filename)
             with utils.smart_open(self._topics_filename, 'rb') as fhandle:
                 self._topics_data = fhandle.read()
 
@@ -310,13 +306,13 @@ def load(cls, fname, *args, **kwargs):
         if lda_vw._model_data:
             # Vowpal Wabbit operates on its own binary model file - deserialise
             # to file at load time, making it immediately ready for use
-            LOG.debug("Writing model bytes to '%s'", lda_vw._model_filename)
+            logger.debug("Writing model bytes to '%s'", lda_vw._model_filename)
             with utils.smart_open(lda_vw._model_filename, 'wb') as fhandle:
                 fhandle.write(lda_vw._model_data)
             lda_vw._model_data = None  # no need to keep in memory after this
 
         if lda_vw._topics_data:
-            LOG.debug("Writing topic bytes to '%s'", lda_vw._topics_filename)
+            logger.debug("Writing topic bytes to '%s'", lda_vw._topics_filename)
             with utils.smart_open(lda_vw._topics_filename, 'wb') as fhandle:
                 fhandle.write(lda_vw._topics_data)
             lda_vw._topics_data = None
@@ -326,23 +322,25 @@ def load(cls, fname, *args, **kwargs):
     def __del__(self):
         """Cleanup the temporary directory used by this wrapper."""
         if self.cleanup_files and self.tmp_dir:
-            LOG.debug("Recursively deleting: %s", self.tmp_dir)
+            logger.debug("Recursively deleting: %s", self.tmp_dir)
             shutil.rmtree(self.tmp_dir)
 
     def _init_temp_dir(self, prefix='tmp'):
         """Create a working temporary directory with given prefix."""
         self.tmp_dir = tempfile.mkdtemp(prefix=prefix)
-        LOG.info('using %s as temp dir', self.tmp_dir)
+        logger.info('using %s as temp dir', self.tmp_dir)
 
     def _get_vw_predict_command(self, corpus_size):
         """Get list of command line arguments for running prediction."""
-        cmd = [self.vw_path,
-               '--testonly',  # don't update model with this data
-               '--lda_D', str(corpus_size),
-               '-i', self._model_filename,  # load existing binary model
-               '-d', self._corpus_filename,
-               '--learning_rate', '0',  # possibly not needed, but harmless
-               '-p', self._predict_filename]
+        cmd = [
+            self.vw_path,
+            '--testonly',  # don't update model with this data
+            '--lda_D', str(corpus_size),
+            '-i', self._model_filename,  # load existing binary model
+            '-d', self._corpus_filename,
+            '--learning_rate', '0',  # possibly not needed, but harmless
+            '-p', self._predict_filename
+        ]
 
         if self.random_seed is not None:
             cmd.extend(['--random_seed', str(self.random_seed)])
@@ -355,27 +353,31 @@ def _get_vw_train_command(self, corpus_size, update=False):
         If 'update' is set to True, this specifies that we're further training
         an existing model.
         """
-        cmd = [self.vw_path,
-               '-d', self._corpus_filename,
-               '--power_t', str(self.decay),
-               '--initial_t', str(self.offset),
-               '--minibatch', str(self.chunksize),
-               '--lda_D', str(corpus_size),
-               '--passes', str(self.passes),
-               '--cache_file', self._cache_filename,
-               '--lda_epsilon', str(self.gamma_threshold),
-               '--readable_model', self._topics_filename,
-               '-k',  # clear cache
-               '-f', self._model_filename]
+        cmd = [
+            self.vw_path,
+            '-d', self._corpus_filename,
+            '--power_t', str(self.decay),
+            '--initial_t', str(self.offset),
+            '--minibatch', str(self.chunksize),
+            '--lda_D', str(corpus_size),
+            '--passes', str(self.passes),
+            '--cache_file', self._cache_filename,
+            '--lda_epsilon', str(self.gamma_threshold),
+            '--readable_model', self._topics_filename,
+            '-k',  # clear cache
+            '-f', self._model_filename
+        ]
 
         if update:
             cmd.extend(['-i', self._model_filename])
         else:
             # these params are read from model file if updating
-            cmd.extend(['--lda', str(self.num_topics),
-                        '-b', str(_bit_length(self.num_terms)),
-                        '--lda_alpha', str(self.alpha),
-                        '--lda_rho', str(self.eta)])
+            cmd.extend([
+                '--lda', str(self.num_topics),
+                '-b', str(_bit_length(self.num_terms)),
+                '--lda_alpha', str(self.alpha),
+                '--lda_rho', str(self.eta)
+            ])
 
         if self.random_seed is not None:
             cmd.extend(['--random_seed', str(self.random_seed)])
@@ -393,8 +395,7 @@ def _load_vw_topics(self):
         of:
         <word_id> <topic_1_gamma> <topic_2_gamma> ...
         """
-        topics = numpy.zeros((self.num_topics, self.num_terms),
-                             dtype=numpy.float32)
+        topics = numpy.zeros((self.num_topics, self.num_terms), dtype=numpy.float32)
 
         with utils.smart_open(self._topics_filename) as topics_file:
             found_data = False
@@ -437,8 +438,7 @@ def _predict(self, chunk):
         vw_data = _parse_vw_output(_run_vw_command(cmd))
         vw_data['corpus_size'] = corpus_size
 
-        predictions = numpy.zeros((corpus_size, self.num_topics),
-                                  dtype=numpy.float32)
+        predictions = numpy.zeros((corpus_size, self.num_topics), dtype=numpy.float32)
 
         with utils.smart_open(self._predict_filename) as fhandle:
             for i, line in enumerate(fhandle):
@@ -524,7 +524,7 @@ def write_corpus_as_vw(corpus, filename):
 
     Returns the number of lines written.
     """
-    LOG.debug("Writing corpus to: %s", filename)
+    logger.debug("Writing corpus to: %s", filename)
 
     corpus_size = 0
     with utils.smart_open(filename, 'wb') as corpus_file:
@@ -552,16 +552,14 @@ def _parse_vw_output(text):
 
 def _run_vw_command(cmd):
     """Execute given Vowpal Wabbit command, log stdout and stderr."""
-    LOG.info("Running Vowpal Wabbit command: %s", ' '.join(cmd))
+    logger.info("Running Vowpal Wabbit command: %s", ' '.join(cmd))
     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)
     output = proc.communicate()[0].decode('utf-8')
-    LOG.debug("Vowpal Wabbit output: %s", output)
+    logger.debug("Vowpal Wabbit output: %s", output)
 
     if proc.returncode != 0:
-        raise subprocess.CalledProcessError(proc.returncode,
-                                            ' '.join(cmd),
-                                            output=output)
+        raise subprocess.CalledProcessError(proc.returncode, ' '.join(cmd), output=output)
 
     return output
 
@@ -588,6 +586,7 @@ def vwmodel2ldamodel(vw_model, iterations=50):
     model_gensim = LdaModel(
         num_topics=vw_model.num_topics, id2word=vw_model.id2word, chunksize=vw_model.chunksize,
         passes=vw_model.passes, alpha=vw_model.alpha, eta=vw_model.eta, decay=vw_model.decay,
-        offset=vw_model.offset, iterations=iterations, gamma_threshold=vw_model.gamma_threshold)
+        offset=vw_model.offset, iterations=iterations, gamma_threshold=vw_model.gamma_threshold
+    )
     model_gensim.expElogbeta[:] = vw_model._get_topics()
     return model_gensim
diff --git a/gensim/models/wrappers/varembed.py b/gensim/models/wrappers/varembed.py
index eab1e0217c..30bf859ec7 100644
--- a/gensim/models/wrappers/varembed.py
+++ b/gensim/models/wrappers/varembed.py
@@ -18,14 +18,10 @@
 """
 
 import logging
-import sys
-
 import numpy as np
 
-from gensim.models.keyedvectors import KeyedVectors
-
-# utility fnc for pickling, common scipy operations etc
 from gensim import utils
+from gensim.models.keyedvectors import KeyedVectors
 from gensim.models.word2vec import Vocab
 
 logger = logging.getLogger(__name__)
@@ -56,25 +52,21 @@ def load_varembed_format(cls, vectors, morfessor_model=None):
         result = cls()
         if vectors is None:
             raise Exception("Please provide vectors binary to load varembed model")
-        D = utils.unpickle(vectors)
-        word_to_ix = D['word_to_ix']
-        morpho_to_ix = D['morpho_to_ix']
-        word_embeddings = D['word_embeddings']
-        morpho_embeddings = D['morpheme_embeddings']
+        d = utils.unpickle(vectors)
+        word_to_ix = d['word_to_ix']
+        morpho_to_ix = d['morpho_to_ix']
+        word_embeddings = d['word_embeddings']
+        morpho_embeddings = d['morpheme_embeddings']
         result.load_word_embeddings(word_embeddings, word_to_ix)
         if morfessor_model:
-            if sys.version_info >= (2, 7):  # Morfessor is only supported for Python 2.7 and above.
-                try:
-                    import morfessor
-                    morfessor_model = morfessor.MorfessorIO().read_binary_model_file(morfessor_model)
-                    result.add_morphemes_to_embeddings(morfessor_model, morpho_embeddings, morpho_to_ix)
-                except ImportError:
-                    # Morfessor Package not found.
-                    logger.error('Could not import morfessor. Not using morpheme embeddings')
-                    raise ImportError('Could not import morfessor.')
-            else:
-                # Raise exception in Python 2.6 or earlier.
-                raise Exception('Using Morphemes requires Python 2.7 and above. Morfessor is not supported in python 2.6')
+            try:
+                import morfessor
+                morfessor_model = morfessor.MorfessorIO().read_binary_model_file(morfessor_model)
+                result.add_morphemes_to_embeddings(morfessor_model, morpho_embeddings, morpho_to_ix)
+            except ImportError:
+                # Morfessor Package not found.
+                logger.error('Could not import morfessor. Not using morpheme embeddings')
+                raise ImportError('Could not import morfessor.')
 
         logger.info('Loaded varembed model vectors from %s', vectors)
         return result
@@ -105,6 +97,10 @@ def add_morphemes_to_embeddings(self, morfessor_model, morpho_embeddings, morpho
         """
         for word in self.vocab:
             morpheme_embedding = np.array(
-                [morpho_embeddings[morpho_to_ix.get(m, -1)] for m in morfessor_model.viterbi_segment(word)[0]]).sum(axis=0)
+                [
+                    morpho_embeddings[morpho_to_ix.get(m, -1)]
+                    for m in morfessor_model.viterbi_segment(word)[0]
+                ]
+            ).sum(axis=0)
             self.syn0[self.vocab[word].index] += morpheme_embedding
         logger.info("Added morphemes to word vectors")
diff --git a/gensim/models/wrappers/wordrank.py b/gensim/models/wrappers/wordrank.py
index 8426af1d82..c31cd28adc 100644
--- a/gensim/models/wrappers/wordrank.py
+++ b/gensim/models/wrappers/wordrank.py
@@ -96,13 +96,23 @@ def train(cls, wr_path, corpus_file, out_name, size=100, window=15, symmetric=1,
         cooccurrence_shuf_file = os.path.join(meta_dir, 'wiki.toy')
         meta_file = os.path.join(meta_dir, 'meta')
 
-        cmd_vocab_count = [os.path.join(wr_path, 'glove', 'vocab_count'), '-min-count', str(min_count), '-max-vocab', str(max_vocab_size)]
-        cmd_cooccurence_count = [os.path.join(wr_path, 'glove', 'cooccur'), '-memory', str(memory), '-vocab-file', temp_vocab_file, '-window-size', str(window), '-symmetric', str(symmetric)]
+        cmd_vocab_count = [
+            os.path.join(wr_path, 'glove', 'vocab_count'),
+            '-min-count', str(min_count), '-max-vocab', str(max_vocab_size)
+        ]
+        cmd_cooccurence_count = [
+            os.path.join(wr_path, 'glove', 'cooccur'), '-memory', str(memory),
+            '-vocab-file', temp_vocab_file, '-window-size', str(window), '-symmetric', str(symmetric)
+        ]
         cmd_shuffle_cooccurences = [os.path.join(wr_path, 'glove', 'shuffle'), '-memory', str(memory)]
         cmd_del_vocab_freq = ['cut', '-d', " ", '-f', '1', temp_vocab_file]
 
         commands = [cmd_vocab_count, cmd_cooccurence_count, cmd_shuffle_cooccurences]
-        input_fnames = [os.path.join(meta_dir, os.path.split(corpus_file)[-1]), os.path.join(meta_dir, os.path.split(corpus_file)[-1]), cooccurrence_file]
+        input_fnames = [
+            os.path.join(meta_dir, os.path.split(corpus_file)[-1]),
+            os.path.join(meta_dir, os.path.split(corpus_file)[-1]),
+            cooccurrence_file
+        ]
         output_fnames = [temp_vocab_file, cooccurrence_file, cooccurrence_shuf_file]
 
         logger.info("Prepare training data (%s) using glove code", ", ".join(input_fnames))
@@ -116,22 +126,24 @@ def train(cls, wr_path, corpus_file, out_name, size=100, window=15, symmetric=1,
             utils.check_output(w, args=cmd_del_vocab_freq)
 
         with smart_open(vocab_file, 'rb') as f:
-            numwords = sum(1 for line in f)
+            numwords = sum(1 for _ in f)
         with smart_open(cooccurrence_shuf_file, 'rb') as f:
-            numlines = sum(1 for line in f)
+            numlines = sum(1 for _ in f)
         with smart_open(meta_file, 'wb') as f:
-            meta_info = "{0} {1}\n{2} {3}\n{4} {5}".format(numwords, numwords, numlines, cooccurrence_shuf_file.split('/')[-1], numwords, vocab_file.split('/')[-1])
+            meta_info = "{0} {1}\n{2} {3}\n{4} {5}".format(
+                numwords, numwords, numlines, cooccurrence_shuf_file.split('/')[-1],
+                numwords, vocab_file.split('/')[-1]
+            )
             f.write(meta_info.encode('utf-8'))
 
         if iter % dump_period == 0:
             iter += 1
         else:
             logger.warning(
-                'Resultant embedding will be from %d iterations rather than the input %d iterations, '
-                'as wordrank dumps the embedding only at dump_period intervals. '
-                'Input an appropriate combination of parameters (iter, dump_period) such that '
-                '"iter mod dump_period" is zero.', iter - (iter % dump_period), iter
-                )
+                "Resultant embedding will be from %d iterations rather than the input %d iterations, as wordrank dumps the embedding only at dump_period intervals. "
+                "Input an appropriate combination of parameters (iter, dump_period) such that \"iter mod dump_period\" is zero.",
+                iter - (iter % dump_period), iter
+            )
 
         wr_args = {
             'path': meta_dir,
@@ -151,20 +163,21 @@ def train(cls, wr_path, corpus_file, out_name, size=100, window=15, symmetric=1,
         }
 
         # run wordrank executable with wr_args
-        cmd = ['mpirun', '-np']
-        cmd.append(str(np))
-        cmd.append(os.path.join(wr_path, 'wordrank'))
+        cmd = ['mpirun', '-np', str(np), os.path.join(wr_path, 'wordrank')]
         for option, value in wr_args.items():
             cmd.append('--%s' % option)
             cmd.append(str(value))
         logger.info("Running wordrank binary")
-        output = utils.check_output(args=cmd)  # noqa:F841
+        utils.check_output(args=cmd)
 
         # use embeddings from max. iteration's dump
         max_iter_dump = iter - (iter % dump_period)
         os.rename('model_word_%d.txt' % max_iter_dump, os.path.join(model_dir, 'wordrank.words'))
         os.rename('model_context_%d.txt' % max_iter_dump, os.path.join(model_dir, 'wordrank.contexts'))
-        model = cls.load_wordrank_model(os.path.join(model_dir, 'wordrank.words'), vocab_file, os.path.join(model_dir, 'wordrank.contexts'), sorted_vocab, ensemble)
+        model = cls.load_wordrank_model(
+            os.path.join(model_dir, 'wordrank.words'), vocab_file,
+            os.path.join(model_dir, 'wordrank.contexts'), sorted_vocab, ensemble
+        )
 
         if cleanup_files:
             rmtree(model_dir)
diff --git a/gensim/nosy.py b/gensim/nosy.py
index 2913e1e694..0606166449 100644
--- a/gensim/nosy.py
+++ b/gensim/nosy.py
@@ -27,7 +27,7 @@
 DEFAULTARGS = '--with-color -exe'  # -w tests'
 
 
-def checkSum():
+def check_sum():
     """
     Return a long which can be used to know if any .py files have changed.
     """
@@ -44,10 +44,9 @@ def checkSum():
     val = 0
     try:
         while True:
-            if checkSum() != val:
-                val = checkSum()
-                os.system('%s %s %s' % (EXECUTABLE, DEFAULTARGS,
-                    ' '.join(sys.argv[1:])))
+            if check_sum() != val:
+                val = check_sum()
+                os.system('%s %s %s' % (EXECUTABLE, DEFAULTARGS, ' '.join(sys.argv[1:])))
                 print(datetime.datetime.now().__str__())
                 print('=' * 77)
             time.sleep(1)
diff --git a/gensim/parsing/porter.py b/gensim/parsing/porter.py
index a22b8b94d1..048e056418 100644
--- a/gensim/parsing/porter.py
+++ b/gensim/parsing/porter.py
@@ -363,10 +363,10 @@ def stem(self, w):
         return self.b[:self.k + 1]
 
     def stem_sentence(self, txt):
-        return " ".join(map(self.stem, txt.split()))
+        return " ".join(self.stem(x) for x in txt.split())
 
     def stem_documents(self, docs):
-        return map(self.stem_sentence, docs)
+        return [self.stem_sentence(x) for x in docs]
 
 
 if __name__ == '__main__':
diff --git a/gensim/parsing/preprocessing.py b/gensim/parsing/preprocessing.py
index a92eb98656..ab25361f60 100644
--- a/gensim/parsing/preprocessing.py
+++ b/gensim/parsing/preprocessing.py
@@ -117,8 +117,11 @@ def stem_text(text):
 
 stem = stem_text
 
-DEFAULT_FILTERS = [lambda x: x.lower(), strip_tags, strip_punctuation, strip_multiple_whitespaces,
-                   strip_numeric, remove_stopwords, strip_short, stem_text]
+DEFAULT_FILTERS = [
+    lambda x: x.lower(), strip_tags, strip_punctuation,
+    strip_multiple_whitespaces, strip_numeric,
+    remove_stopwords, strip_short, stem_text
+]
 
 
 def preprocess_string(s, filters=DEFAULT_FILTERS):
diff --git a/gensim/scripts/glove2word2vec.py b/gensim/scripts/glove2word2vec.py
index 4f13de4524..0667440f80 100644
--- a/gensim/scripts/glove2word2vec.py
+++ b/gensim/scripts/glove2word2vec.py
@@ -17,7 +17,6 @@
 which contains the number of vectors and their dimensionality (two integers).
 """
 
-import os
 import sys
 import logging
 import argparse
@@ -30,7 +29,7 @@
 def get_glove_info(glove_file_name):
     """Return the number of vectors and dimensions in a file in GloVe format."""
     with smart_open(glove_file_name) as f:
-        num_lines = sum(1 for line in f)
+        num_lines = sum(1 for _ in f)
     with smart_open(glove_file_name) as f:
         num_dims = len(f.readline().split()) - 1
     return num_lines, num_dims
@@ -53,19 +52,9 @@ def glove2word2vec(glove_input_file, word2vec_output_file):
     logging.root.setLevel(level=logging.INFO)
     logger.info("running %s", ' '.join(sys.argv))
 
-    # check and process cmdline input
-    program = os.path.basename(sys.argv[0])
-    if len(sys.argv) < 2:
-        print(globals()['__doc__'] % locals())
-        sys.exit(1)
-
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "-i", "--input", required=True,
-        help="Input file, in gloVe format (read-only).")
-    parser.add_argument(
-        "-o", "--output", required=True,
-        help="Output file, in word2vec text format (will be overwritten).")
+    parser.add_argument("-i", "--input", required=True, help="Input file, in gloVe format (read-only).")
+    parser.add_argument("-o", "--output", required=True, help="Output file, in word2vec text format (will be overwritten).")
     args = parser.parse_args()
 
     # do the actual conversion
diff --git a/gensim/scripts/make_wiki_online.py b/gensim/scripts/make_wiki_online.py
index 66985a566e..37c437f3e1 100755
--- a/gensim/scripts/make_wiki_online.py
+++ b/gensim/scripts/make_wiki_online.py
@@ -55,7 +55,7 @@
 
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
     logging.root.setLevel(level=logging.INFO)
-    logger.info("running %s" % ' '.join(sys.argv))
+    logger.info("running %s", ' '.join(sys.argv))
 
     # check and process input arguments
     if len(sys.argv) < 3:
@@ -107,4 +107,4 @@
     # ~4h; result file is 15GB! bzip2'ed down to 4.5GB
     MmCorpus.serialize(outp + '_tfidf.mm', tfidf[mm], progress_cnt=10000)
 
-    logger.info("finished running %s" % program)
+    logger.info("finished running %s", program)
diff --git a/gensim/scripts/make_wiki_online_lemma.py b/gensim/scripts/make_wiki_online_lemma.py
index 66985a566e..37c437f3e1 100755
--- a/gensim/scripts/make_wiki_online_lemma.py
+++ b/gensim/scripts/make_wiki_online_lemma.py
@@ -55,7 +55,7 @@
 
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
     logging.root.setLevel(level=logging.INFO)
-    logger.info("running %s" % ' '.join(sys.argv))
+    logger.info("running %s", ' '.join(sys.argv))
 
     # check and process input arguments
     if len(sys.argv) < 3:
@@ -107,4 +107,4 @@
     # ~4h; result file is 15GB! bzip2'ed down to 4.5GB
     MmCorpus.serialize(outp + '_tfidf.mm', tfidf[mm], progress_cnt=10000)
 
-    logger.info("finished running %s" % program)
+    logger.info("finished running %s", program)
diff --git a/gensim/scripts/make_wiki_online_nodebug.py b/gensim/scripts/make_wiki_online_nodebug.py
index 66985a566e..37c437f3e1 100755
--- a/gensim/scripts/make_wiki_online_nodebug.py
+++ b/gensim/scripts/make_wiki_online_nodebug.py
@@ -55,7 +55,7 @@
 
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
     logging.root.setLevel(level=logging.INFO)
-    logger.info("running %s" % ' '.join(sys.argv))
+    logger.info("running %s", ' '.join(sys.argv))
 
     # check and process input arguments
     if len(sys.argv) < 3:
@@ -107,4 +107,4 @@
     # ~4h; result file is 15GB! bzip2'ed down to 4.5GB
     MmCorpus.serialize(outp + '_tfidf.mm', tfidf[mm], progress_cnt=10000)
 
-    logger.info("finished running %s" % program)
+    logger.info("finished running %s", program)
diff --git a/gensim/scripts/make_wikicorpus.py b/gensim/scripts/make_wikicorpus.py
index 66985a566e..37c437f3e1 100755
--- a/gensim/scripts/make_wikicorpus.py
+++ b/gensim/scripts/make_wikicorpus.py
@@ -55,7 +55,7 @@
 
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
     logging.root.setLevel(level=logging.INFO)
-    logger.info("running %s" % ' '.join(sys.argv))
+    logger.info("running %s", ' '.join(sys.argv))
 
     # check and process input arguments
     if len(sys.argv) < 3:
@@ -107,4 +107,4 @@
     # ~4h; result file is 15GB! bzip2'ed down to 4.5GB
     MmCorpus.serialize(outp + '_tfidf.mm', tfidf[mm], progress_cnt=10000)
 
-    logger.info("finished running %s" % program)
+    logger.info("finished running %s", program)
diff --git a/gensim/scripts/word2vec2tensor.py b/gensim/scripts/word2vec2tensor.py
index 88cab79d25..6bb1301a59 100644
--- a/gensim/scripts/word2vec2tensor.py
+++ b/gensim/scripts/word2vec2tensor.py
@@ -37,13 +37,13 @@
 
 
 def word2vec2tensor(word2vec_model_path, tensor_filename, binary=False):
-    '''
+    """
     Convert Word2Vec mode to 2D tensor TSV file and metadata file
     Args:
-        param1 (str): word2vec model file path
-        param2 (str): filename prefix
-        param2 (bool): set True to use a binary Word2Vec model, defaults to False
-    '''
+        word2vec_model_path (str): word2vec model file path
+        tensor_filename (str): filename prefix
+        binary (bool): set True to use a binary Word2Vec model, defaults to False
+    """
     model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model_path, binary=binary)
     outfiletsv = tensor_filename + '_tensor.tsv'
     outfiletsvmeta = tensor_filename + '_metadata.tsv'
@@ -52,11 +52,11 @@ def word2vec2tensor(word2vec_model_path, tensor_filename, binary=False):
         with open(outfiletsvmeta, 'w+') as file_metadata:
             for word in model.index2word:
                 file_metadata.write(gensim.utils.to_utf8(word) + gensim.utils.to_utf8('\n'))
-                vector_row = '\t'.join(map(str, model[word]))
+                vector_row = '\t'.join(str(x) for x in model[word])
                 file_vector.write(vector_row + '\n')
 
-    logger.info("2D tensor file saved to %s" % outfiletsv)
-    logger.info("Tensor metadata file saved to %s" % outfiletsvmeta)
+    logger.info("2D tensor file saved to %s", outfiletsv)
+    logger.info("Tensor metadata file saved to %s", outfiletsvmeta)
 
 
 if __name__ == "__main__":
@@ -64,24 +64,12 @@ def word2vec2tensor(word2vec_model_path, tensor_filename, binary=False):
     logging.root.setLevel(level=logging.INFO)
     logger.info("running %s", ' '.join(sys.argv))
 
-    # check and process cmdline input
-    program = os.path.basename(sys.argv[0])
-    if len(sys.argv) < 2:
-        print(globals()['__doc__'] % locals())
-        sys.exit(1)
-
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "-i", "--input", required=True,
-        help="Input word2vec model")
-    parser.add_argument(
-        "-o", "--output", required=True,
-        help="Output tensor file name prefix")
-    parser.add_argument("-b", "--binary",
-                        required=False,
-                        help="If word2vec model in binary format, set True, else False")
+    parser.add_argument("-i", "--input", required=True, help="Input word2vec model")
+    parser.add_argument("-o", "--output", required=True, help="Output tensor file name prefix")
+    parser.add_argument("-b", "--binary", required=False, help="If word2vec model in binary format, set True, else False")
     args = parser.parse_args()
 
     word2vec2tensor(args.input, args.output, args.binary)
 
-    logger.info("finished running %s", program)
+    logger.info("finished running %s", os.path.basename(sys.argv[0]))
diff --git a/gensim/scripts/word2vec_standalone.py b/gensim/scripts/word2vec_standalone.py
index 52baea6f4c..878e588613 100644
--- a/gensim/scripts/word2vec_standalone.py
+++ b/gensim/scripts/word2vec_standalone.py
@@ -61,17 +61,8 @@
 
 
 if __name__ == "__main__":
-    logging.basicConfig(
-        format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s',
-        level=logging.INFO)
+    logging.basicConfig(format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s', level=logging.INFO)
     logger.info("running %s", " ".join(sys.argv))
-
-    # check and process cmdline input
-    program = os.path.basename(sys.argv[0])
-    if len(sys.argv) < 2:
-        print(globals()['__doc__'] % locals())
-        sys.exit(1)
-
     seterr(all='raise')  # don't ignore numpy errors
 
     parser = argparse.ArgumentParser()
@@ -107,7 +98,8 @@
     model = Word2Vec(
         corpus, size=args.size, min_count=args.min_count, workers=args.threads,
         window=args.window, sample=args.sample, alpha=args.alpha, sg=skipgram,
-        hs=args.hs, negative=args.negative, cbow_mean=1, iter=args.iter)
+        hs=args.hs, negative=args.negative, cbow_mean=1, iter=args.iter
+    )
 
     if args.output:
         outfile = args.output
@@ -124,4 +116,4 @@
         questions_file = args.accuracy
         model.accuracy(questions_file)
 
-    logger.info("finished running %s", program)
+    logger.info("finished running %s", os.path.basename(sys.argv[0]))
diff --git a/gensim/similarities/docsim.py b/gensim/similarities/docsim.py
index efe71159d3..5e93c6f8cf 100755
--- a/gensim/similarities/docsim.py
+++ b/gensim/similarities/docsim.py
@@ -105,7 +105,7 @@ def __getstate__(self):
         return result
 
     def __str__(self):
-        return ("%s Shard(%i documents in %s)" % (self.cls.__name__, len(self), self.fullname()))
+        return "%s Shard(%i documents in %s)" % (self.cls.__name__, len(self), self.fullname())
 
     def get_index(self):
         if not hasattr(self, 'index'):
@@ -209,8 +209,9 @@ def __len__(self):
         return len(self.fresh_docs) + sum([len(shard) for shard in self.shards])
 
     def __str__(self):
-        return ("Similarity index with %i documents in %i shards (stored under %s)" %
-                (len(self), len(self.shards), self.output_prefix))
+        return "Similarity index with %i documents in %i shards (stored under %s)" % (
+            len(self), len(self.shards), self.output_prefix
+        )
 
     def add_documents(self, corpus):
         """
@@ -262,8 +263,9 @@ def close_shard(self):
         # consider the shard sparse if its density is < 30%
         issparse = 0.3 > 1.0 * self.fresh_nnz / (len(self.fresh_docs) * self.num_features)
         if issparse:
-            index = SparseMatrixSimilarity(self.fresh_docs, num_terms=self.num_features,
-                                           num_docs=len(self.fresh_docs), num_nnz=self.fresh_nnz)
+            index = SparseMatrixSimilarity(
+                self.fresh_docs, num_terms=self.num_features, num_docs=len(self.fresh_docs), num_nnz=self.fresh_nnz
+            )
         else:
             index = MatrixSimilarity(self.fresh_docs, num_features=self.num_features)
         logger.info("creating %s shard #%s", 'sparse' if issparse else 'dense', shardid)
@@ -334,8 +336,7 @@ def __getitem__(self, query):
             # the following uses a lot of lazy evaluation and (optionally) parallel
             # processing, to improve query latency and minimize memory footprint.
             offsets = numpy.cumsum([0] + [len(shard) for shard in self.shards])
-            convert = lambda doc, shard_no: [(doc_index + offsets[shard_no], sim)
-                                             for doc_index, sim in doc]
+            convert = lambda doc, shard_no: [(doc_index + offsets[shard_no], sim) for doc_index, sim in doc]
             is_corpus, query = utils.is_corpus(query)
             is_corpus = is_corpus or hasattr(query, 'ndim') and query.ndim > 1 and query.shape[0] > 1
             if not is_corpus:
@@ -370,8 +371,7 @@ def vector_by_id(self, docpos):
             if docpos < pos:
                 break
         if not self.shards or docpos < 0 or docpos >= pos:
-            raise ValueError("invalid document position: %s (must be 0 <= x < %s)" %
-                             (docpos, len(self)))
+            raise ValueError("invalid document position: %s (must be 0 <= x < %s)" % (docpos, len(self)))
         result = shard.get_document_id(docpos - pos + len(shard))
         return result
 
@@ -458,7 +458,6 @@ def destroy(self):
         for fname in glob.glob(self.output_prefix + '*'):
             logger.info("deleting %s", fname)
             os.remove(fname)
-# endclass Similarity
 
 
 class MatrixSimilarity(interfaces.SimilarityABC):
@@ -496,7 +495,10 @@ class for description of the other parameters.
 
         if corpus is not None:
             if self.num_features <= 0:
-                raise ValueError("cannot index a corpus with zero features (you must specify either `num_features` or a non-empty corpus in the constructor)")
+                raise ValueError(
+                    "cannot index a corpus with zero features (you must specify either `num_features` "
+                    "or a non-empty corpus in the constructor)"
+                )
             logger.info("creating matrix with %i documents and %i features", corpus_len, num_features)
             self.index = numpy.empty(shape=(corpus_len, num_features), dtype=dtype)
             # iterate over corpus, populating the numpy index matrix with (normalized)
@@ -535,7 +537,8 @@ def get_similarities(self, query):
         if is_corpus:
             query = numpy.asarray(
                 [matutils.sparse2full(vec, self.num_features) for vec in query],
-                dtype=self.index.dtype)
+                dtype=self.index.dtype
+            )
         else:
             if scipy.sparse.issparse(query):
                 query = query.toarray()  # convert sparse to dense
@@ -553,7 +556,6 @@ def get_similarities(self, query):
 
     def __str__(self):
         return "%s<%i docs, %i features>" % (self.__class__.__name__, len(self), self.index.shape[1])
-# endclass MatrixSimilarity
 
 
 class WmdSimilarity(interfaces.SimilarityABC):
@@ -638,7 +640,6 @@ def get_similarities(self, query):
 
     def __str__(self):
         return "%s<%i docs, %i features>" % (self.__class__.__name__, len(self), self.w2v_model.wv.syn0.shape[1])
-# endclass WmdSimilarity
 
 
 class SparseMatrixSimilarity(interfaces.SimilarityABC):
@@ -689,7 +690,8 @@ def __init__(self, corpus, num_features=None, num_terms=None, num_docs=None, num
                        matutils.unitvec(v)) for v in corpus)
             self.index = matutils.corpus2csc(
                 corpus, num_terms=num_terms, num_docs=num_docs, num_nnz=num_nnz,
-                dtype=dtype, printprogress=10000).T
+                dtype=dtype, printprogress=10000
+            ).T
 
             # convert to Compressed Sparse Row for efficient row slicing and multiplications
             self.index = self.index.tocsr()  # currently no-op, CSC.T is already CSR
@@ -736,4 +738,3 @@ def get_similarities(self, query):
             # otherwise, return a 2d matrix (#queries x #index)
             result = result.toarray().T
         return result
-# endclass SparseMatrixSimilarity
diff --git a/gensim/similarities/index.py b/gensim/similarities/index.py
index d0ca879225..e9323f6998 100644
--- a/gensim/similarities/index.py
+++ b/gensim/similarities/index.py
@@ -17,7 +17,9 @@
 try:
     from annoy import AnnoyIndex
 except ImportError:
-    raise ImportError("Annoy has not been installed, if you wish to use the annoy indexer, please run `pip install annoy`")
+    raise ImportError(
+        "Annoy has not been installed, if you wish to use the annoy indexer, please run `pip install annoy`"
+    )
 
 
 class AnnoyIndexer(object):
@@ -49,7 +51,8 @@ def load(self, fname):
         fname_dict = fname + '.d'
         if not (os.path.exists(fname) and os.path.exists(fname_dict)):
             raise IOError(
-                "Can't find index files '%s' and '%s' - Unable to restore AnnoyIndexer state." % (fname, fname_dict))
+                "Can't find index files '%s' and '%s' - Unable to restore AnnoyIndexer state." % (fname, fname_dict)
+            )
         else:
             with smart_open(fname_dict) as f:
                 d = _pickle.loads(f.read())
diff --git a/gensim/sklearn_api/atmodel.py b/gensim/sklearn_api/atmodel.py
index 0217350d3a..d3128243a6 100644
--- a/gensim/sklearn_api/atmodel.py
+++ b/gensim/sklearn_api/atmodel.py
@@ -23,10 +23,10 @@ class AuthorTopicTransformer(TransformerMixin, BaseEstimator):
     """
 
     def __init__(self, num_topics=100, id2word=None, author2doc=None, doc2author=None,
-            chunksize=2000, passes=1, iterations=50, decay=0.5, offset=1.0,
-            alpha='symmetric', eta='symmetric', update_every=1, eval_every=10,
-            gamma_threshold=0.001, serialized=False, serialization_path=None,
-            minimum_probability=0.01, random_state=None):
+                 chunksize=2000, passes=1, iterations=50, decay=0.5, offset=1.0,
+                 alpha='symmetric', eta='symmetric', update_every=1, eval_every=10,
+                 gamma_threshold=0.001, serialized=False, serialization_path=None,
+                 minimum_probability=0.01, random_state=None):
         """
         Sklearn wrapper for AuthorTopic model. See gensim.models.AuthorTopicModel for parameter details.
         """
@@ -55,11 +55,14 @@ def fit(self, X, y=None):
         Fit the model according to the given training data.
         Calls gensim.models.AuthorTopicModel
         """
-        self.gensim_model = models.AuthorTopicModel(corpus=X, num_topics=self.num_topics, id2word=self.id2word,
+        self.gensim_model = models.AuthorTopicModel(
+            corpus=X, num_topics=self.num_topics, id2word=self.id2word,
             author2doc=self.author2doc, doc2author=self.doc2author, chunksize=self.chunksize, passes=self.passes,
             iterations=self.iterations, decay=self.decay, offset=self.offset, alpha=self.alpha, eta=self.eta,
-            update_every=self.update_every, eval_every=self.eval_every, gamma_threshold=self.gamma_threshold, serialized=self.serialized,
-            serialization_path=self.serialization_path, minimum_probability=self.minimum_probability, random_state=self.random_state)
+            update_every=self.update_every, eval_every=self.eval_every, gamma_threshold=self.gamma_threshold,
+            serialized=self.serialized, serialization_path=self.serialization_path,
+            minimum_probability=self.minimum_probability, random_state=self.random_state
+        )
         return self
 
     def transform(self, author_names):
@@ -69,7 +72,9 @@ def transform(self, author_names):
         """
         # The input as array of array
         if self.gensim_model is None:
-            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+            raise NotFittedError(
+                "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method."
+            )
 
         check = lambda x: [x] if not isinstance(x, list) else x
         author_names = check(author_names)
@@ -88,11 +93,14 @@ def partial_fit(self, X, author2doc=None, doc2author=None):
         Train model over X.
         """
         if self.gensim_model is None:
-            self.gensim_model = models.AuthorTopicModel(corpus=X, num_topics=self.num_topics, id2word=self.id2word,
+            self.gensim_model = models.AuthorTopicModel(
+                corpus=X, num_topics=self.num_topics, id2word=self.id2word,
                 author2doc=self.author2doc, doc2author=self.doc2author, chunksize=self.chunksize, passes=self.passes,
                 iterations=self.iterations, decay=self.decay, offset=self.offset, alpha=self.alpha, eta=self.eta,
-                update_every=self.update_every, eval_every=self.eval_every, gamma_threshold=self.gamma_threshold, serialized=self.serialized,
-                serialization_path=self.serialization_path, minimum_probability=self.minimum_probability, random_state=self.random_state)
+                update_every=self.update_every, eval_every=self.eval_every, gamma_threshold=self.gamma_threshold,
+                serialized=self.serialized, serialization_path=self.serialization_path,
+                minimum_probability=self.minimum_probability, random_state=self.random_state
+            )
 
         self.gensim_model.update(corpus=X, author2doc=author2doc, doc2author=doc2author)
         return self
diff --git a/gensim/sklearn_api/d2vmodel.py b/gensim/sklearn_api/d2vmodel.py
index 05d496d9b1..14163f1600 100644
--- a/gensim/sklearn_api/d2vmodel.py
+++ b/gensim/sklearn_api/d2vmodel.py
@@ -22,13 +22,10 @@ class D2VTransformer(TransformerMixin, BaseEstimator):
     Base Doc2Vec module
     """
 
-    def __init__(self, dm_mean=None, dm=1, dbow_words=0, dm_concat=0,
-                dm_tag_count=1, docvecs=None, docvecs_mapfile=None,
-                comment=None, trim_rule=None, size=100, alpha=0.025,
-                window=5, min_count=5, max_vocab_size=None, sample=1e-3,
-                seed=1, workers=3, min_alpha=0.0001, hs=0, negative=5,
-                cbow_mean=1, hashfxn=hash, iter=5, sorted_vocab=1,
-                batch_words=10000):
+    def __init__(self, dm_mean=None, dm=1, dbow_words=0, dm_concat=0, dm_tag_count=1, docvecs=None,
+                 docvecs_mapfile=None, comment=None, trim_rule=None, size=100, alpha=0.025, window=5, min_count=5,
+                 max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001, hs=0, negative=5, cbow_mean=1,
+                 hashfxn=hash, iter=5, sorted_vocab=1, batch_words=10000):
         """
         Sklearn api for Doc2Vec model. See gensim.models.Doc2Vec and gensim.models.Word2Vec for parameter details.
         """
@@ -66,14 +63,16 @@ def fit(self, X, y=None):
         Fit the model according to the given training data.
         Calls gensim.models.Doc2Vec
         """
-        self.gensim_model = models.Doc2Vec(documents=X, dm_mean=self.dm_mean, dm=self.dm,
+        self.gensim_model = models.Doc2Vec(
+            documents=X, dm_mean=self.dm_mean, dm=self.dm,
             dbow_words=self.dbow_words, dm_concat=self.dm_concat, dm_tag_count=self.dm_tag_count,
             docvecs=self.docvecs, docvecs_mapfile=self.docvecs_mapfile, comment=self.comment,
             trim_rule=self.trim_rule, size=self.size, alpha=self.alpha, window=self.window,
             min_count=self.min_count, max_vocab_size=self.max_vocab_size, sample=self.sample,
             seed=self.seed, workers=self.workers, min_alpha=self.min_alpha, hs=self.hs,
             negative=self.negative, cbow_mean=self.cbow_mean, hashfxn=self.hashfxn,
-            iter=self.iter, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words)
+            iter=self.iter, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words
+        )
         return self
 
     def transform(self, docs):
@@ -83,7 +82,9 @@ def transform(self, docs):
         or a single document like : ['calculus', 'mathematical']
         """
         if self.gensim_model is None:
-            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+            raise NotFittedError(
+                "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method."
+            )
 
         # The input as array of array
         check = lambda x: [x] if isinstance(x[0], string_types) else x
diff --git a/gensim/sklearn_api/hdp.py b/gensim/sklearn_api/hdp.py
index 92265a5e8f..d1dcec01a5 100644
--- a/gensim/sklearn_api/hdp.py
+++ b/gensim/sklearn_api/hdp.py
@@ -23,10 +23,8 @@ class HdpTransformer(TransformerMixin, BaseEstimator):
     Base HDP module
     """
 
-    def __init__(self, id2word, max_chunks=None, max_time=None,
-            chunksize=256, kappa=1.0, tau=64.0, K=15, T=150, alpha=1,
-            gamma=1, eta=0.01, scale=1.0, var_converge=0.0001,
-            outputdir=None, random_state=None):
+    def __init__(self, id2word, max_chunks=None, max_time=None, chunksize=256, kappa=1.0, tau=64.0, K=15, T=150,
+                 alpha=1, gamma=1, eta=0.01, scale=1.0, var_converge=0.0001, outputdir=None, random_state=None):
         """
         Sklearn api for HDP model. See gensim.models.HdpModel for parameter details.
         """
@@ -57,10 +55,12 @@ def fit(self, X, y=None):
         else:
             corpus = X
 
-        self.gensim_model = models.HdpModel(corpus=corpus, id2word=self.id2word, max_chunks=self.max_chunks,
+        self.gensim_model = models.HdpModel(
+            corpus=corpus, id2word=self.id2word, max_chunks=self.max_chunks,
             max_time=self.max_time, chunksize=self.chunksize, kappa=self.kappa, tau=self.tau,
             K=self.K, T=self.T, alpha=self.alpha, gamma=self.gamma, eta=self.eta, scale=self.scale,
-            var_converge=self.var_converge, outputdir=self.outputdir, random_state=self.random_state)
+            var_converge=self.var_converge, outputdir=self.outputdir, random_state=self.random_state
+        )
         return self
 
     def transform(self, docs):
@@ -72,7 +72,9 @@ def transform(self, docs):
         or a single document like : [(4, 1), (7, 1)]
         """
         if self.gensim_model is None:
-            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+            raise NotFittedError(
+                "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method."
+            )
 
         # The input as array of array
         check = lambda x: [x] if isinstance(x[0], tuple) else x
@@ -82,7 +84,7 @@ def transform(self, docs):
         max_num_topics = 0
         for k, v in enumerate(docs):
             X[k] = self.gensim_model[v]
-            max_num_topics = max(max_num_topics, max(list(map(lambda x: x[0], X[k]))) + 1)
+            max_num_topics = max(max_num_topics, max(x[0] for x in X[k]) + 1)
 
         for k, v in enumerate(X):
             # returning dense representation for compatibility with sklearn but we should go back to sparse representation in the future
@@ -99,10 +101,12 @@ def partial_fit(self, X):
             X = matutils.Sparse2Corpus(X)
 
         if self.gensim_model is None:
-            self.gensim_model = models.HdpModel(id2word=self.id2word, max_chunks=self.max_chunks,
+            self.gensim_model = models.HdpModel(
+                id2word=self.id2word, max_chunks=self.max_chunks,
                 max_time=self.max_time, chunksize=self.chunksize, kappa=self.kappa, tau=self.tau,
                 K=self.K, T=self.T, alpha=self.alpha, gamma=self.gamma, eta=self.eta, scale=self.scale,
-                var_converge=self.var_converge, outputdir=self.outputdir, random_state=self.random_state)
+                var_converge=self.var_converge, outputdir=self.outputdir, random_state=self.random_state
+            )
 
         self.gensim_model.update(corpus=X)
         return self
diff --git a/gensim/sklearn_api/ldamodel.py b/gensim/sklearn_api/ldamodel.py
index 107353e2df..77d539e616 100644
--- a/gensim/sklearn_api/ldamodel.py
+++ b/gensim/sklearn_api/ldamodel.py
@@ -24,11 +24,9 @@ class LdaTransformer(TransformerMixin, BaseEstimator):
     Base LDA module
     """
 
-    def __init__(
-            self, num_topics=100, id2word=None, chunksize=2000, passes=1,
-            update_every=1, alpha='symmetric', eta=None, decay=0.5,
-            offset=1.0, eval_every=10, iterations=50, gamma_threshold=0.001,
-            minimum_probability=0.01, random_state=None, scorer='perplexity'):
+    def __init__(self, num_topics=100, id2word=None, chunksize=2000, passes=1, update_every=1, alpha='symmetric',
+                 eta=None, decay=0.5, offset=1.0, eval_every=10, iterations=50, gamma_threshold=0.001,
+                 minimum_probability=0.01, random_state=None, scorer='perplexity'):
         """
         Sklearn wrapper for LDA model. See gensim.model.LdaModel for parameter details.
 
@@ -63,12 +61,14 @@ def fit(self, X, y=None):
         else:
             corpus = X
 
-        self.gensim_model = models.LdaModel(corpus=corpus, num_topics=self.num_topics, id2word=self.id2word,
+        self.gensim_model = models.LdaModel(
+            corpus=corpus, num_topics=self.num_topics, id2word=self.id2word,
             chunksize=self.chunksize, passes=self.passes, update_every=self.update_every,
             alpha=self.alpha, eta=self.eta, decay=self.decay, offset=self.offset,
             eval_every=self.eval_every, iterations=self.iterations,
             gamma_threshold=self.gamma_threshold, minimum_probability=self.minimum_probability,
-            random_state=self.random_state)
+            random_state=self.random_state
+        )
         return self
 
     def transform(self, docs):
@@ -109,11 +109,13 @@ def partial_fit(self, X):
             X = matutils.Sparse2Corpus(X)
 
         if self.gensim_model is None:
-            self.gensim_model = models.LdaModel(num_topics=self.num_topics, id2word=self.id2word,
+            self.gensim_model = models.LdaModel(
+                num_topics=self.num_topics, id2word=self.id2word,
                 chunksize=self.chunksize, passes=self.passes, update_every=self.update_every,
                 alpha=self.alpha, eta=self.eta, decay=self.decay, offset=self.offset,
                 eval_every=self.eval_every, iterations=self.iterations, gamma_threshold=self.gamma_threshold,
-                minimum_probability=self.minimum_probability, random_state=self.random_state)
+                minimum_probability=self.minimum_probability, random_state=self.random_state
+            )
 
         self.gensim_model.update(corpus=X)
         return self
diff --git a/gensim/sklearn_api/ldaseqmodel.py b/gensim/sklearn_api/ldaseqmodel.py
index 25b50bf95e..6b96d8d6fa 100644
--- a/gensim/sklearn_api/ldaseqmodel.py
+++ b/gensim/sklearn_api/ldaseqmodel.py
@@ -22,9 +22,9 @@ class LdaSeqTransformer(TransformerMixin, BaseEstimator):
     Base LdaSeq module
     """
 
-    def __init__(self, time_slice=None, id2word=None, alphas=0.01, num_topics=10,
-                initialize='gensim', sstats=None, lda_model=None, obs_variance=0.5, chain_variance=0.005, passes=10,
-                random_state=None, lda_inference_max_iter=25, em_min_iter=6, em_max_iter=20, chunksize=100):
+    def __init__(self, time_slice=None, id2word=None, alphas=0.01, num_topics=10, initialize='gensim', sstats=None,
+                 lda_model=None, obs_variance=0.5, chain_variance=0.005, passes=10, random_state=None,
+                 lda_inference_max_iter=25, em_min_iter=6, em_max_iter=20, chunksize=100):
         """
         Sklearn wrapper for LdaSeq model. See gensim.models.LdaSeqModel for parameter details.
         """
@@ -50,11 +50,13 @@ def fit(self, X, y=None):
         Fit the model according to the given training data.
         Calls gensim.models.LdaSeqModel
         """
-        self.gensim_model = models.LdaSeqModel(corpus=X, time_slice=self.time_slice, id2word=self.id2word,
+        self.gensim_model = models.LdaSeqModel(
+            corpus=X, time_slice=self.time_slice, id2word=self.id2word,
             alphas=self.alphas, num_topics=self.num_topics, initialize=self.initialize, sstats=self.sstats,
             lda_model=self.lda_model, obs_variance=self.obs_variance, chain_variance=self.chain_variance,
             passes=self.passes, random_state=self.random_state, lda_inference_max_iter=self.lda_inference_max_iter,
-            em_min_iter=self.em_min_iter, em_max_iter=self.em_max_iter, chunksize=self.chunksize)
+            em_min_iter=self.em_min_iter, em_max_iter=self.em_max_iter, chunksize=self.chunksize
+        )
         return self
 
     def transform(self, docs):
diff --git a/gensim/sklearn_api/lsimodel.py b/gensim/sklearn_api/lsimodel.py
index c44240b2ba..776af6f5da 100644
--- a/gensim/sklearn_api/lsimodel.py
+++ b/gensim/sklearn_api/lsimodel.py
@@ -24,8 +24,7 @@ class LsiTransformer(TransformerMixin, BaseEstimator):
     Base LSI module
     """
 
-    def __init__(self, num_topics=200, id2word=None, chunksize=20000,
-                 decay=1.0, onepass=True, power_iters=2, extra_samples=100):
+    def __init__(self, num_topics=200, id2word=None, chunksize=20000, decay=1.0, onepass=True, power_iters=2, extra_samples=100):
         """
         Sklearn wrapper for LSI model. See gensim.model.LsiModel for parameter details.
         """
@@ -48,8 +47,10 @@ def fit(self, X, y=None):
         else:
             corpus = X
 
-        self.gensim_model = models.LsiModel(corpus=corpus, num_topics=self.num_topics, id2word=self.id2word, chunksize=self.chunksize,
-            decay=self.decay, onepass=self.onepass, power_iters=self.power_iters, extra_samples=self.extra_samples)
+        self.gensim_model = models.LsiModel(
+            corpus=corpus, num_topics=self.num_topics, id2word=self.id2word, chunksize=self.chunksize,
+            decay=self.decay, onepass=self.onepass, power_iters=self.power_iters, extra_samples=self.extra_samples
+        )
         return self
 
     def transform(self, docs):
@@ -61,7 +62,9 @@ def transform(self, docs):
         or a single document like : [(4, 1), (7, 1)]
         """
         if self.gensim_model is None:
-            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+            raise NotFittedError(
+                "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method."
+            )
 
         # The input as array of array
         check = lambda x: [x] if isinstance(x[0], tuple) else x
@@ -82,8 +85,10 @@ def partial_fit(self, X):
             X = matutils.Sparse2Corpus(X)
 
         if self.gensim_model is None:
-            self.gensim_model = models.LsiModel(num_topics=self.num_topics, id2word=self.id2word, chunksize=self.chunksize,
-                decay=self.decay, onepass=self.onepass, power_iters=self.power_iters, extra_samples=self.extra_samples)
+            self.gensim_model = models.LsiModel(
+                num_topics=self.num_topics, id2word=self.id2word, chunksize=self.chunksize, decay=self.decay,
+                onepass=self.onepass, power_iters=self.power_iters, extra_samples=self.extra_samples
+            )
 
         self.gensim_model.add_documents(corpus=X)
         return self
diff --git a/gensim/sklearn_api/phrases.py b/gensim/sklearn_api/phrases.py
index 8a944f0235..ad00c51c0e 100644
--- a/gensim/sklearn_api/phrases.py
+++ b/gensim/sklearn_api/phrases.py
@@ -21,8 +21,7 @@ class PhrasesTransformer(TransformerMixin, BaseEstimator):
     Base Phrases module
     """
 
-    def __init__(self, min_count=5, threshold=10.0, max_vocab_size=40000000,
-            delimiter=b'_', progress_per=10000):
+    def __init__(self, min_count=5, threshold=10.0, max_vocab_size=40000000, delimiter=b'_', progress_per=10000):
         """
         Sklearn wrapper for Phrases model.
         """
@@ -37,8 +36,10 @@ def fit(self, X, y=None):
         """
         Fit the model according to the given training data.
         """
-        self.gensim_model = models.Phrases(sentences=X, min_count=self.min_count, threshold=self.threshold,
-            max_vocab_size=self.max_vocab_size, delimiter=self.delimiter, progress_per=self.progress_per)
+        self.gensim_model = models.Phrases(
+            sentences=X, min_count=self.min_count, threshold=self.threshold,
+            max_vocab_size=self.max_vocab_size, delimiter=self.delimiter, progress_per=self.progress_per
+        )
         return self
 
     def transform(self, docs):
@@ -61,8 +62,10 @@ def transform(self, docs):
 
     def partial_fit(self, X):
         if self.gensim_model is None:
-            self.gensim_model = models.Phrases(sentences=X, min_count=self.min_count, threshold=self.threshold,
-                max_vocab_size=self.max_vocab_size, delimiter=self.delimiter, progress_per=self.progress_per)
+            self.gensim_model = models.Phrases(
+                sentences=X, min_count=self.min_count, threshold=self.threshold,
+                max_vocab_size=self.max_vocab_size, delimiter=self.delimiter, progress_per=self.progress_per
+            )
 
         self.gensim_model.add_vocab(X)
         return self
diff --git a/gensim/sklearn_api/rpmodel.py b/gensim/sklearn_api/rpmodel.py
index 8673c7d39e..62395e0bce 100644
--- a/gensim/sklearn_api/rpmodel.py
+++ b/gensim/sklearn_api/rpmodel.py
@@ -47,7 +47,9 @@ def transform(self, docs):
         or a single document like : [(0, 1.0), (1, 1.0), (2, 1.0)]
         """
         if self.gensim_model is None:
-            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+            raise NotFittedError(
+                "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method."
+            )
 
         # The input as array of array
         check = lambda x: [x] if isinstance(x[0], tuple) else x
diff --git a/gensim/sklearn_api/text2bow.py b/gensim/sklearn_api/text2bow.py
index e5a96e6551..6beb126d0d 100644
--- a/gensim/sklearn_api/text2bow.py
+++ b/gensim/sklearn_api/text2bow.py
@@ -34,7 +34,7 @@ def fit(self, X, y=None):
         """
         Fit the model according to the given training data.
         """
-        tokenized_docs = list(map(lambda x: list(self.tokenizer(x)), X))
+        tokenized_docs = [list(self.tokenizer(x)) for x in X]
         self.gensim_model = Dictionary(documents=tokenized_docs, prune_at=self.prune_at)
         return self
 
@@ -43,12 +43,14 @@ def transform(self, docs):
         Return the BOW format for the input documents.
         """
         if self.gensim_model is None:
-            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+            raise NotFittedError(
+                "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method."
+            )
 
         # input as python lists
         check = lambda x: [x] if isinstance(x, string_types) else x
         docs = check(docs)
-        tokenized_docs = list(map(lambda x: list(self.tokenizer(x)), docs))
+        tokenized_docs = [list(self.tokenizer(x)) for x in docs]
         X = [[] for _ in range(0, len(tokenized_docs))]
 
         for k, v in enumerate(tokenized_docs):
@@ -61,6 +63,6 @@ def partial_fit(self, X):
         if self.gensim_model is None:
             self.gensim_model = Dictionary(prune_at=self.prune_at)
 
-        tokenized_docs = list(map(lambda x: list(self.tokenizer(x)), X))
+        tokenized_docs = [list(self.tokenizer(x)) for x in X]
         self.gensim_model.add_documents(tokenized_docs)
         return self
diff --git a/gensim/sklearn_api/tfidf.py b/gensim/sklearn_api/tfidf.py
index ca34af6b40..414c597dc1 100644
--- a/gensim/sklearn_api/tfidf.py
+++ b/gensim/sklearn_api/tfidf.py
@@ -21,8 +21,8 @@ class TfIdfTransformer(TransformerMixin, BaseEstimator):
     Base Tf-Idf module
     """
 
-    def __init__(self, corpus=None, id2word=None, dictionary=None,
-            wlocal=gensim.utils.identity, wglobal=gensim.models.tfidfmodel.df2idf, normalize=True):
+    def __init__(self, id2word=None, dictionary=None, wlocal=gensim.utils.identity,
+                 wglobal=gensim.models.tfidfmodel.df2idf, normalize=True):
         """
         Sklearn wrapper for Tf-Idf model.
         """
@@ -46,7 +46,9 @@ def transform(self, docs):
         Return the transformed documents after multiplication with the tf-idf matrix.
         """
         if self.gensim_model is None:
-            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+            raise NotFittedError(
+                "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method."
+            )
 
         # input as python lists
         check = lambda x: [x] if isinstance(x[0], tuple) else x
diff --git a/gensim/sklearn_api/w2vmodel.py b/gensim/sklearn_api/w2vmodel.py
index 32d3e2ffa7..6ddea2eb90 100644
--- a/gensim/sklearn_api/w2vmodel.py
+++ b/gensim/sklearn_api/w2vmodel.py
@@ -23,10 +23,9 @@ class W2VTransformer(TransformerMixin, BaseEstimator):
     Base Word2Vec module
     """
 
-    def __init__(self, size=100, alpha=0.025, window=5, min_count=5,
-            max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
-            sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
-            trim_rule=None, sorted_vocab=1, batch_words=10000):
+    def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None, sample=1e-3, seed=1,
+                 workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
+                 trim_rule=None, sorted_vocab=1, batch_words=10000):
         """
         Sklearn wrapper for Word2Vec model. See gensim.models.Word2Vec for parameter details.
         """
@@ -56,12 +55,14 @@ def fit(self, X, y=None):
         Fit the model according to the given training data.
         Calls gensim.models.Word2Vec
         """
-        self.gensim_model = models.Word2Vec(sentences=X, size=self.size, alpha=self.alpha,
+        self.gensim_model = models.Word2Vec(
+            sentences=X, size=self.size, alpha=self.alpha,
             window=self.window, min_count=self.min_count, max_vocab_size=self.max_vocab_size,
             sample=self.sample, seed=self.seed, workers=self.workers, min_alpha=self.min_alpha,
             sg=self.sg, hs=self.hs, negative=self.negative, cbow_mean=self.cbow_mean,
             hashfxn=self.hashfxn, iter=self.iter, null_word=self.null_word, trim_rule=self.trim_rule,
-            sorted_vocab=self.sorted_vocab, batch_words=self.batch_words)
+            sorted_vocab=self.sorted_vocab, batch_words=self.batch_words
+        )
         return self
 
     def transform(self, words):
@@ -69,7 +70,9 @@ def transform(self, words):
         Return the word-vectors for the input list of words.
         """
         if self.gensim_model is None:
-            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+            raise NotFittedError(
+                "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method."
+            )
 
         # The input as array of array
         check = lambda x: [x] if isinstance(x, six.string_types) else x
@@ -83,4 +86,7 @@ def transform(self, words):
         return np.reshape(np.array(X), (len(words), self.size))
 
     def partial_fit(self, X):
-        raise NotImplementedError("'partial_fit' has not been implemented for W2VTransformer. However, the model can be updated with a fixed vocabulary using Gensim API call.")
+        raise NotImplementedError(
+            "'partial_fit' has not been implemented for W2VTransformer. "
+            "However, the model can be updated with a fixed vocabulary using Gensim API call."
+        )
diff --git a/gensim/summarization/bm25.py b/gensim/summarization/bm25.py
index d634a32b54..1fb11a8d77 100644
--- a/gensim/summarization/bm25.py
+++ b/gensim/summarization/bm25.py
@@ -18,7 +18,7 @@ class BM25(object):
 
     def __init__(self, corpus):
         self.corpus_size = len(corpus)
-        self.avgdl = sum(map(lambda x: float(len(x)), corpus)) / self.corpus_size
+        self.avgdl = sum(float(len(x)) for x in corpus) / self.corpus_size
         self.corpus = corpus
         self.f = []
         self.df = {}
@@ -62,7 +62,7 @@ def get_scores(self, document, average_idf):
 
 def get_bm25_weights(corpus):
     bm25 = BM25(corpus)
-    average_idf = sum(map(lambda k: float(bm25.idf[k]), bm25.idf.keys())) / len(bm25.idf.keys())
+    average_idf = sum(float(val) for val in bm25.idf.values()) / len(bm25.idf)
 
     weights = []
     for doc in corpus:
diff --git a/gensim/summarization/graph.py b/gensim/summarization/graph.py
index 8424873e35..c35a59a25d 100644
--- a/gensim/summarization/graph.py
+++ b/gensim/summarization/graph.py
@@ -242,8 +242,7 @@ def del_edge(self, edge):
             self.del_edge_labeling((v, u))
 
     def del_edge_labeling(self, edge):
-        keys = [edge]
-        keys.append(edge[::-1])
+        keys = [edge, edge[::-1]]
 
         for key in keys:
             for mapping in [self.edge_properties, self.edge_attr]:
diff --git a/gensim/summarization/keywords.py b/gensim/summarization/keywords.py
index b24e6f1f04..1630c9389d 100644
--- a/gensim/summarization/keywords.py
+++ b/gensim/summarization/keywords.py
@@ -30,7 +30,7 @@ def _get_pos_filters():
     return frozenset(INCLUDING_FILTER), frozenset(EXCLUDING_FILTER)
 
 
-def _get_words_for_graph(tokens, pos_filter):
+def _get_words_for_graph(tokens, pos_filter=None):
     if pos_filter is None:
         include_filters, exclude_filters = _get_pos_filters()
     else:
@@ -97,7 +97,7 @@ def _process_text(graph, tokens, split_text):
 
 def _queue_iterator(queue):
     iterations = queue.qsize()
-    for i in xrange(iterations):
+    for _ in xrange(iterations):
         var = queue.get()
         yield var
         queue.put(var)
@@ -197,7 +197,8 @@ def _format_results(_keywords, combined_keywords, split, scores):
     return "\n".join(combined_keywords)
 
 
-def keywords(text, ratio=0.2, words=None, split=False, scores=False, pos_filter=['NN', 'JJ'], lemmatize=False, deacc=True):
+def keywords(text, ratio=0.2, words=None, split=False, scores=False, pos_filter=('NN', 'JJ'),
+             lemmatize=False, deacc=True):
     # Gets a dict of word -> lemma
     text = to_unicode(text)
     tokens = _clean_text_by_word(text, deacc=deacc)
diff --git a/gensim/summarization/summarizer.py b/gensim/summarization/summarizer.py
index c067c23faf..3307a2280f 100644
--- a/gensim/summarization/summarizer.py
+++ b/gensim/summarization/summarizer.py
@@ -151,7 +151,7 @@ def summarize_corpus(corpus, ratio=0.2):
 
     # Warns the user if there are too few documents.
     if len(corpus) < INPUT_MIN_LENGTH:
-        logger.warning("Input corpus is expected to have at least " + str(INPUT_MIN_LENGTH) + " documents.")
+        logger.warning("Input corpus is expected to have at least %d documents.", INPUT_MIN_LENGTH)
 
     graph = _build_graph(hashable_corpus)
     _set_graph_edge_weights(graph)
@@ -205,7 +205,7 @@ def summarize(text, ratio=0.2, word_count=None, split=False):
 
     # Warns if the text is too short.
     if len(sentences) < INPUT_MIN_LENGTH:
-        logger.warning("Input text is expected to have at least " + str(INPUT_MIN_LENGTH) + " sentences.")
+        logger.warning("Input text is expected to have at least %d sentences.", INPUT_MIN_LENGTH)
 
     corpus = _build_corpus(sentences)
 
diff --git a/gensim/summarization/textcleaner.py b/gensim/summarization/textcleaner.py
index 404c44b18e..fa6a56b887 100644
--- a/gensim/summarization/textcleaner.py
+++ b/gensim/summarization/textcleaner.py
@@ -97,7 +97,7 @@ def clean_text_by_word(text, deacc=True):
     else:
         tags = None
     units = merge_syntactic_units(original_words, filtered_words, tags)
-    return dict((unit.text, unit) for unit in units)
+    return {unit.text: unit for unit in units}
 
 
 def tokenize_by_word(text):
diff --git a/gensim/test/basetests.py b/gensim/test/basetmtests.py
similarity index 91%
rename from gensim/test/basetests.py
rename to gensim/test/basetmtests.py
index a22bfe2d30..e8cb1d259d 100644
--- a/gensim/test/basetests.py
+++ b/gensim/test/basetmtests.py
@@ -13,27 +13,27 @@
 
 
 class TestBaseTopicModel(object):
-    def testPrintTopic(self):
+    def test_print_topic(self):
         topics = self.model.show_topics(formatted=True)
         for topic_no, topic in topics:
             self.assertTrue(isinstance(topic_no, int))
             self.assertTrue(isinstance(topic, str) or isinstance(topic, unicode))  # noqa:F821
 
-    def testPrintTopics(self):
+    def test_print_topics(self):
         topics = self.model.print_topics()
 
         for topic_no, topic in topics:
             self.assertTrue(isinstance(topic_no, int))
             self.assertTrue(isinstance(topic, str) or isinstance(topic, unicode))  # noqa:F821
 
-    def testShowTopic(self):
+    def test_show_topic(self):
         topic = self.model.show_topic(1)
 
         for k, v in topic:
             self.assertTrue(isinstance(k, six.string_types))
             self.assertTrue(isinstance(v, (np.floating, float)))
 
-    def testShowTopics(self):
+    def test_show_topics(self):
         topics = self.model.show_topics(formatted=False)
 
         for topic_no, topic in topics:
@@ -43,7 +43,7 @@ def testShowTopics(self):
                 self.assertTrue(isinstance(k, six.string_types))
                 self.assertTrue(isinstance(v, (np.floating, float)))
 
-    def testGetTopics(self):
+    def test_get_topics(self):
         topics = self.model.get_topics()
         vocab_size = len(self.model.id2word)
         for topic in topics:
diff --git a/gensim/test/simspeed.py b/gensim/test/simspeed.py
index 4c1ffcab6f..7ba25fc2ea 100755
--- a/gensim/test/simspeed.py
+++ b/gensim/test/simspeed.py
@@ -27,7 +27,7 @@
 
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
-    logging.info("running %s" % " ".join(sys.argv))
+    logging.info("running %s", " ".join(sys.argv))
 
     # check and process cmdline input
     program = os.path.basename(sys.argv[0])
@@ -54,8 +54,10 @@
     # because it needs to convert sparse vecs to np arrays and normalize them to
     # unit length=extra work, which #3 avoids.
     query = list(itertools.islice(corpus_dense, 1000))
-    logging.info("test 1 (dense): dense corpus of %i docs vs. index (%i documents, %i dense features)" %
-                 (len(query), len(index_dense), index_dense.num_features))
+    logging.info(
+        "test 1 (dense): dense corpus of %i docs vs. index (%i documents, %i dense features)",
+        len(query), len(index_dense), index_dense.num_features
+    )
     for chunksize in [1, 4, 8, 16, 64, 128, 256, 512, 1024]:
         start = time()
         if chunksize > 1:
@@ -68,13 +70,17 @@
         assert len(sims) == len(query)  # make sure we have one result for each query document
         taken = time() - start
         queries = math.ceil(1.0 * len(query) / chunksize)
-        logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)" %
-                     (chunksize, taken, len(query) / taken, queries / taken))
+        logging.info(
+            "chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)",
+            chunksize, taken, len(query) / taken, queries / taken
+        )
 
     # Same comment as for test #1 but vs. test #4.
     query = list(itertools.islice(corpus_sparse, 1000))
-    logging.info("test 2 (sparse): sparse corpus of %i docs vs. sparse index (%i documents, %i features, %.2f%% density)" %
-                 (len(query), len(corpus_sparse), index_sparse.index.shape[1], density))
+    logging.info(
+        "test 2 (sparse): sparse corpus of %i docs vs. sparse index (%i documents, %i features, %.2f%% density)",
+        len(query), len(corpus_sparse), index_sparse.index.shape[1], density
+    )
     for chunksize in [1, 5, 10, 100, 500, 1000]:
         start = time()
         if chunksize > 1:
@@ -87,28 +93,37 @@
         assert len(sims) == len(query)  # make sure we have one result for each query document
         taken = time() - start
         queries = math.ceil(1.0 * len(query) / chunksize)
-        logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)" %
-                     (chunksize, taken, len(query) / taken, queries / taken))
-
-    logging.info("test 3 (dense): similarity of all vs. all (%i documents, %i dense features)" %
-                 (len(corpus_dense), index_dense.num_features))
+        logging.info(
+            "chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)",
+            chunksize, taken, len(query) / taken, queries / taken
+        )
+
+    logging.info(
+        "test 3 (dense): similarity of all vs. all (%i documents, %i dense features)",
+        len(corpus_dense), index_dense.num_features
+    )
     for chunksize in [0, 1, 4, 8, 16, 64, 128, 256, 512, 1024]:
         index_dense.chunksize = chunksize
         start = time()
         # `sims` stores the entire N x N sim matrix in memory!
         # this is not necessary, but i added it to test the accuracy of the result
         # (=report mean diff below)
-        sims = [sim for sim in index_dense]
+        sims = list(index_dense)
         taken = time() - start
         sims = np.asarray(sims)
         if chunksize == 0:
-            logging.info("chunksize=%i, time=%.4fs (%.2f docs/s)" % (chunksize, taken, len(corpus_dense) / taken))
+            logging.info(
+                "chunksize=%i, time=%.4fs (%.2f docs/s)",
+                chunksize, taken, len(corpus_dense) / taken
+            )
             unchunksizeed = sims
         else:
             queries = math.ceil(1.0 * len(corpus_dense) / chunksize)
             diff = np.mean(np.abs(unchunksizeed - sims))
-            logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s), meandiff=%.3e" %
-                         (chunksize, taken, len(corpus_dense) / taken, queries / taken, diff))
+            logging.info(
+                "chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s), meandiff=%.3e",
+                chunksize, taken, len(corpus_dense) / taken, queries / taken, diff
+            )
         del sims
 
     index_dense.num_best = 10
@@ -116,32 +131,41 @@
     for chunksize in [0, 1, 4, 8, 16, 64, 128, 256, 512, 1024]:
         index_dense.chunksize = chunksize
         start = time()
-        sims = [sim for sim in index_dense]
+        sims = list(index_dense)
         taken = time() - start
         if chunksize == 0:
             queries = len(corpus_dense)
         else:
             queries = math.ceil(1.0 * len(corpus_dense) / chunksize)
-        logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)" %
-                     (chunksize, taken, len(corpus_dense) / taken, queries / taken))
+        logging.info(
+            "chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)",
+            chunksize, taken, len(corpus_dense) / taken, queries / taken
+        )
     index_dense.num_best = None
 
-    logging.info("test 5 (sparse): similarity of all vs. all (%i documents, %i features, %.2f%% density)" %
-                 (len(corpus_sparse), index_sparse.index.shape[1], density))
+    logging.info(
+        "test 5 (sparse): similarity of all vs. all (%i documents, %i features, %.2f%% density)",
+        len(corpus_sparse), index_sparse.index.shape[1], density
+    )
     for chunksize in [0, 5, 10, 100, 500, 1000, 5000]:
         index_sparse.chunksize = chunksize
         start = time()
-        sims = [sim for sim in index_sparse]
+        sims = list(index_sparse)
         taken = time() - start
         sims = np.asarray(sims)
         if chunksize == 0:
-            logging.info("chunksize=%i, time=%.4fs (%.2f docs/s)" % (chunksize, taken, len(corpus_sparse) / taken))
+            logging.info(
+                "chunksize=%i, time=%.4fs (%.2f docs/s)",
+                chunksize, taken, len(corpus_sparse) / taken
+            )
             unchunksizeed = sims
         else:
             queries = math.ceil(1.0 * len(corpus_sparse) / chunksize)
             diff = np.mean(np.abs(unchunksizeed - sims))
-            logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s), meandiff=%.3e" %
-                         (chunksize, taken, len(corpus_sparse) / taken, queries / taken, diff))
+            logging.info(
+                "chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s), meandiff=%.3e",
+                chunksize, taken, len(corpus_sparse) / taken, queries / taken, diff
+            )
         del sims
 
     index_sparse.num_best = 10
@@ -149,14 +173,16 @@
     for chunksize in [0, 5, 10, 100, 500, 1000, 5000]:
         index_sparse.chunksize = chunksize
         start = time()
-        sims = [sim for sim in index_sparse]
+        sims = list(index_sparse)
         taken = time() - start
         if chunksize == 0:
             queries = len(corpus_sparse)
         else:
             queries = math.ceil(1.0 * len(corpus_sparse) / chunksize)
-        logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)" %
-                     (chunksize, taken, len(corpus_sparse) / taken, queries / taken))
+        logging.info(
+            "chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)",
+            chunksize, taken, len(corpus_sparse) / taken, queries / taken
+        )
     index_sparse.num_best = None
 
-    logging.info("finished running %s" % program)
+    logging.info("finished running %s", program)
diff --git a/gensim/test/simspeed2.py b/gensim/test/simspeed2.py
index 334730a6f1..931caef950 100755
--- a/gensim/test/simspeed2.py
+++ b/gensim/test/simspeed2.py
@@ -25,7 +25,7 @@
 
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
-    logging.info("running %s" % " ".join(sys.argv))
+    logging.info("running %s", " ".join(sys.argv))
 
     # check and process cmdline input
     program = os.path.basename(sys.argv[0])
@@ -47,8 +47,10 @@
 
     density = 100.0 * sum(shard.num_nnz for shard in index_sparse.shards) / (len(index_sparse) * sparse_features)
 
-    logging.info("test 1 (dense): similarity of all vs. all (%i documents, %i dense features)" %
-                 (len(corpus_dense), index_dense.num_features))
+    logging.info(
+        "test 1 (dense): similarity of all vs. all (%i documents, %i dense features)",
+        len(corpus_dense), index_dense.num_features
+    )
     for chunksize in [1, 8, 32, 64, 128, 256, 512, 1024, index_dense.shardsize]:
         index_dense.chunksize = chunksize
         start = time()
@@ -56,8 +58,10 @@
             pass
         taken = time() - start
         queries = math.ceil(1.0 * len(corpus_dense) / chunksize)
-        logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)" %
-                     (chunksize, taken, len(corpus_dense) / taken, queries / taken))
+        logging.info(
+            "chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)",
+            chunksize, taken, len(corpus_dense) / taken, queries / taken
+        )
 
     index_dense.num_best = 10
     logging.info("test 2 (dense): as above, but only ask for the top-10 most similar for each document")
@@ -67,12 +71,17 @@
         sims = [sim for sim in index_dense]
         taken = time() - start
         queries = math.ceil(1.0 * len(corpus_dense) / chunksize)
-        logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)" %
-                     (chunksize, taken, len(corpus_dense) / taken, queries / taken))
+        logging.info(
+            "chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)",
+            chunksize, taken, len(corpus_dense) / taken, queries / taken
+        )
     index_dense.num_best = None
 
-    logging.info("test 3 (sparse): similarity of all vs. all (%i documents, %i features, %.2f%% density)" %
-                 (len(corpus_sparse), index_sparse.num_features, density))
+    logging.info(
+        "test 3 (sparse): similarity of all vs. all (%i documents, %i features, %.2f%% density)",
+        len(corpus_sparse), index_sparse.num_features, density
+    )
+
     for chunksize in [1, 5, 10, 100, 256, 500, 1000, index_sparse.shardsize]:
         index_sparse.chunksize = chunksize
         start = time()
@@ -80,8 +89,10 @@
             pass
         taken = time() - start
         queries = math.ceil(1.0 * len(corpus_sparse) / chunksize)
-        logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)" %
-                     (chunksize, taken, len(corpus_sparse) / taken, queries / taken))
+        logging.info(
+            "chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)",
+            chunksize, taken, len(corpus_sparse) / taken, queries / taken
+        )
 
     index_sparse.num_best = 10
     logging.info("test 4 (sparse): as above, but only ask for the top-10 most similar for each document")
@@ -92,8 +103,10 @@
             pass
         taken = time() - start
         queries = math.ceil(1.0 * len(corpus_sparse) / chunksize)
-        logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)" %
-                     (chunksize, taken, len(corpus_sparse) / taken, queries / taken))
+        logging.info(
+            "chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)",
+            chunksize, taken, len(corpus_sparse) / taken, queries / taken
+        )
     index_sparse.num_best = None
 
     # Difference between test #5 and test #1 is that the query in #5 is a gensim iterable
@@ -101,8 +114,10 @@
     # because it needs to convert sparse vecs to numpy arrays and normalize them to
     # unit length=extra work, which #1 avoids.
     query = list(itertools.islice(corpus_dense, 1000))
-    logging.info("test 5 (dense): dense corpus of %i docs vs. index (%i documents, %i dense features)" %
-                 (len(query), len(index_dense), index_dense.num_features))
+    logging.info(
+        "test 5 (dense): dense corpus of %i docs vs. index (%i documents, %i dense features)",
+        len(query), len(index_dense), index_dense.num_features
+    )
     for chunksize in [1, 8, 32, 64, 128, 256, 512, 1024]:
         start = time()
         if chunksize > 1:
@@ -114,13 +129,17 @@
                 _ = index_dense[vec]
         taken = time() - start
         queries = math.ceil(1.0 * len(query) / chunksize)
-        logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)" %
-                     (chunksize, taken, len(query) / taken, queries / taken))
+        logging.info(
+            "chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)",
+            chunksize, taken, len(query) / taken, queries / taken
+        )
 
     # Same comment as for test #5.
     query = list(itertools.islice(corpus_dense, 1000))
-    logging.info("test 6 (sparse): sparse corpus of %i docs vs. sparse index (%i documents, %i features, %.2f%% density)" %
-                 (len(query), len(corpus_sparse), index_sparse.num_features, density))
+    logging.info(
+        "test 6 (sparse): sparse corpus of %i docs vs. sparse index (%i documents, %i features, %.2f%% density)",
+        len(query), len(corpus_sparse), index_sparse.num_features, density
+    )
     for chunksize in [1, 5, 10, 100, 500, 1000]:
         start = time()
         if chunksize > 1:
@@ -132,7 +151,9 @@
                 _ = index_sparse[vec]
         taken = time() - start
         queries = math.ceil(1.0 * len(query) / chunksize)
-        logging.info("chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)" %
-                     (chunksize, taken, len(query) / taken, queries / taken))
+        logging.info(
+            "chunksize=%i, time=%.4fs (%.2f docs/s, %.2f queries/s)",
+            chunksize, taken, len(query) / taken, queries / taken
+        )
 
-    logging.info("finished running %s" % program)
+    logging.info("finished running %s", program)
diff --git a/gensim/test/svd_error.py b/gensim/test/svd_error.py
index 4f204c1147..e6ab11bb78 100755
--- a/gensim/test/svd_error.py
+++ b/gensim/test/svd_error.py
@@ -51,7 +51,7 @@
 def norm2(a):
     """Spectral norm ("norm 2") of a symmetric matrix `a`."""
     if COMPUTE_NORM2:
-        logging.info("computing spectral norm of a %s matrix" % str(a.shape))
+        logging.info("computing spectral norm of a %s matrix", str(a.shape))
         return scipy.linalg.eigvalsh(a).max()  # much faster than np.linalg.norm(2)
     else:
         return np.nan
@@ -65,8 +65,10 @@ def print_error(name, aat, u, s, ideal_nf, ideal_n2):
     err = -np.dot(u, np.dot(np.diag(s), u.T))
     err += aat
     nf, n2 = np.linalg.norm(err), norm2(err)
-    print('%s error: norm_frobenius=%f (/ideal=%g), norm2=%f (/ideal=%g), RMSE=%g' %
-           (name, nf, nf / ideal_nf, n2, n2 / ideal_n2, rmse(err)))
+    print(
+        '%s error: norm_frobenius=%f (/ideal=%g), norm2=%f (/ideal=%g), RMSE=%g' %
+        (name, nf, nf / ideal_nf, n2, n2 / ideal_n2, rmse(err))
+    )
     sys.stdout.flush()
 
 
@@ -82,7 +84,7 @@ def __iter__(self):
 
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
-    logging.info("running %s" % " ".join(sys.argv))
+    logging.info("running %s", " ".join(sys.argv))
 
     program = os.path.basename(sys.argv[0])
     # do we have enough cmd line arguments?
@@ -105,7 +107,7 @@ def __iter__(self):
         m = int(sys.argv[3])
     else:
         m = mm.num_terms
-    logging.info("using %i documents and %i features" % (n, m))
+    logging.info("using %i documents and %i features", n, m)
     corpus = ClippedCorpus(mm, n, m)
     id2word = gensim.utils.FakeDict(m)
 
@@ -137,7 +139,7 @@ def __iter__(self):
         print_error("baseline", aat,
                     np.zeros((m, factors)), np.zeros((factors)), ideal_fro, ideal_n2)
         if sparsesvd:
-            logging.info("computing SVDLIBC SVD for %i factors" % (factors))
+            logging.info("computing SVDLIBC SVD for %i factors", factors)
             taken = time.time()
             corpus_ram = gensim.matutils.corpus2csc(corpus, num_terms=m)
             ut, s, vt = sparsesvd(corpus_ram, factors)
@@ -152,30 +154,41 @@ def __iter__(self):
             del u
         for power_iters in POWER_ITERS:
             for chunksize in CHUNKSIZE:
-                logging.info("computing incremental SVD for %i factors, %i power iterations, chunksize %i" %
-                             (factors, power_iters, chunksize))
+                logging.info(
+                    "computing incremental SVD for %i factors, %i power iterations, chunksize %i",
+                    factors, power_iters, chunksize
+                )
                 taken = time.time()
                 gensim.models.lsimodel.P2_EXTRA_ITERS = power_iters
-                model = gensim.models.LsiModel(corpus, id2word=id2word, num_topics=factors,
-                                               chunksize=chunksize, power_iters=power_iters)
+                model = gensim.models.LsiModel(
+                    corpus, id2word=id2word, num_topics=factors,
+                    chunksize=chunksize, power_iters=power_iters
+                )
                 taken = time.time() - taken
                 u, s = model.projection.u.astype(np.float32), model.projection.s.astype(np.float32)**2
                 del model
-                print("incremental SVD for %i factors, %i power iterations, chunksize %i took %s s (spectrum %f .. %f)" %
-                       (factors, power_iters, chunksize, taken, s[0], s[-1]))
+                print(
+                    "incremental SVD for %i factors, %i power iterations, "
+                    "chunksize %i took %s s (spectrum %f .. %f)" %
+                    (factors, power_iters, chunksize, taken, s[0], s[-1])
+                )
                 print_error('incremental SVD', aat, u, s, ideal_fro, ideal_n2)
                 del u
-            logging.info("computing multipass SVD for %i factors, %i power iterations" %
-                   (factors, power_iters,))
+            logging.info("computing multipass SVD for %i factors, %i power iterations", factors, power_iters)
             taken = time.time()
-            model = gensim.models.LsiModel(corpus, id2word=id2word, num_topics=factors, chunksize=2000,
-                                           onepass=False, power_iters=power_iters)
+            model = gensim.models.LsiModel(
+                corpus, id2word=id2word, num_topics=factors, chunksize=2000,
+                onepass=False, power_iters=power_iters
+            )
             taken = time.time() - taken
             u, s = model.projection.u.astype(np.float32), model.projection.s.astype(np.float32)**2
             del model
-            print("multipass SVD for %i factors, %i power iterations took %s s (spectrum %f .. %f)" %
-                   (factors, power_iters, taken, s[0], s[-1]))
+            print(
+                "multipass SVD for %i factors, "
+                "%i power iterations took %s s (spectrum %f .. %f)" %
+                (factors, power_iters, taken, s[0], s[-1])
+            )
             print_error('multipass SVD', aat, u, s, ideal_fro, ideal_n2)
             del u
 
-    logging.info("finished running %s" % program)
+    logging.info("finished running %s", program)
diff --git a/gensim/test/test_atmodel.py b/gensim/test/test_atmodel.py
index 17cf1619f9..97bffde623 100644
--- a/gensim/test/test_atmodel.py
+++ b/gensim/test/test_atmodel.py
@@ -26,7 +26,7 @@
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models import atmodel
 from gensim import matutils
-from gensim.test import basetests
+from gensim.test import basetmtests
 
 # TODO:
 # Test that computing the bound on new unseen documents works as expected (this is somewhat different
@@ -40,23 +40,38 @@
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [['human', 'interface', 'computer'],
- ['survey', 'user', 'computer', 'system', 'response', 'time'],
- ['eps', 'user', 'interface', 'system'],
- ['system', 'human', 'system', 'eps'],
- ['user', 'response', 'time'],
- ['trees'],
- ['graph', 'trees'],
- ['graph', 'minors', 'trees'],
- ['graph', 'minors', 'survey']]
+texts = [
+    ['human', 'interface', 'computer'],
+    ['survey', 'user', 'computer', 'system', 'response', 'time'],
+    ['eps', 'user', 'interface', 'system'],
+    ['system', 'human', 'system', 'eps'],
+    ['user', 'response', 'time'],
+    ['trees'],
+    ['graph', 'trees'],
+    ['graph', 'minors', 'trees'],
+    ['graph', 'minors', 'survey']
+]
 dictionary = Dictionary(texts)
 corpus = [dictionary.doc2bow(text) for text in texts]
 
 # Assign some authors randomly to the documents above.
-author2doc = {'john': [0, 1, 2, 3, 4, 5, 6], 'jane': [2, 3, 4, 5, 6, 7, 8], 'jack': [0, 2, 4, 6, 8], 'jill': [1, 3, 5, 7]}
-doc2author = {0: ['john', 'jack'], 1: ['john', 'jill'], 2: ['john', 'jane', 'jack'], 3: ['john', 'jane', 'jill'],
-        4: ['john', 'jane', 'jack'], 5: ['john', 'jane', 'jill'], 6: ['john', 'jane', 'jack'], 7: ['jane', 'jill'],
-        8: ['jane', 'jack']}
+author2doc = {
+    'john': [0, 1, 2, 3, 4, 5, 6],
+    'jane': [2, 3, 4, 5, 6, 7, 8],
+    'jack': [0, 2, 4, 6, 8],
+    'jill': [1, 3, 5, 7]
+}
+doc2author = {
+    0: ['john', 'jack'],
+    1: ['john', 'jill'],
+    2: ['john', 'jane', 'jack'],
+    3: ['john', 'jane', 'jill'],
+    4: ['john', 'jane', 'jack'],
+    5: ['john', 'jane', 'jill'],
+    6: ['john', 'jane', 'jack'],
+    7: ['jane', 'jill'],
+    8: ['jane', 'jack']
+}
 
 # More data with new and old authors (to test update method).
 # Although the text is just a subset of the previous, the model
@@ -73,7 +88,7 @@ def testfile(test_fname=''):
     return os.path.join(tempfile.gettempdir(), fname)
 
 
-class TestAuthorTopicModel(unittest.TestCase, basetests.TestBaseTopicModel):
+class TestAuthorTopicModel(unittest.TestCase, basetmtests.TestBaseTopicModel):
     def setUp(self):
         self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
         self.class_ = atmodel.AuthorTopicModel
@@ -101,8 +116,10 @@ def testTransform(self):
             passed = np.allclose(sorted(vec), sorted(expected), atol=1e-1)  # must contain the same values, up to re-ordering
             if passed:
                 break
-            logging.warning("Author-topic model failed to converge on attempt %i (got %s, expected %s)" %
-                            (i, sorted(vec), sorted(expected)))
+            logging.warning(
+                "Author-topic model failed to converge on attempt %i (got %s, expected %s)",
+                i, sorted(vec), sorted(expected)
+            )
         self.assertTrue(passed)
 
     def testBasic(self):
@@ -117,8 +134,14 @@ def testBasic(self):
 
     def testAuthor2docMissing(self):
         # Check that the results are the same if author2doc is constructed automatically from doc2author.
-        model = self.class_(corpus, author2doc=author2doc, doc2author=doc2author, id2word=dictionary, num_topics=2, random_state=0)
-        model2 = self.class_(corpus, doc2author=doc2author, id2word=dictionary, num_topics=2, random_state=0)
+        model = self.class_(
+            corpus, author2doc=author2doc, doc2author=doc2author,
+            id2word=dictionary, num_topics=2, random_state=0
+        )
+        model2 = self.class_(
+            corpus, doc2author=doc2author, id2word=dictionary,
+            num_topics=2, random_state=0
+        )
 
         # Compare Jill's topics before in both models.
         jill_topics = model.get_author_topics('jill')
@@ -129,8 +152,14 @@ def testAuthor2docMissing(self):
 
     def testDoc2authorMissing(self):
         # Check that the results are the same if doc2author is constructed automatically from author2doc.
-        model = self.class_(corpus, author2doc=author2doc, doc2author=doc2author, id2word=dictionary, num_topics=2, random_state=0)
-        model2 = self.class_(corpus, author2doc=author2doc, id2word=dictionary, num_topics=2, random_state=0)
+        model = self.class_(
+            corpus, author2doc=author2doc, doc2author=doc2author,
+            id2word=dictionary, num_topics=2, random_state=0
+        )
+        model2 = self.class_(
+            corpus, author2doc=author2doc, id2word=dictionary,
+            num_topics=2, random_state=0
+        )
 
         # Compare Jill's topics before in both models.
         jill_topics = model.get_author_topics('jill')
@@ -185,7 +214,10 @@ def testUpdateNewDataNewAuthor(self):
     def testSerialized(self):
         # Test the model using serialized corpora. Basic tests, plus test of update functionality.
 
-        model = self.class_(self.corpus, author2doc=author2doc, id2word=dictionary, num_topics=2, serialized=True, serialization_path=datapath('testcorpus_serialization.mm'))
+        model = self.class_(
+            self.corpus, author2doc=author2doc, id2word=dictionary, num_topics=2,
+            serialized=True, serialization_path=datapath('testcorpus_serialization.mm')
+        )
 
         jill_topics = model.get_author_topics('jill')
         jill_topics = matutils.sparse2full(jill_topics, model.num_topics)
@@ -216,7 +248,10 @@ def testTransformSerialized(self):
         # better random initialization
         for i in range(25):  # restart at most 5 times
             # create the transformation model
-            model = self.class_(id2word=dictionary, num_topics=2, passes=100, random_state=0, serialized=True, serialization_path=datapath('testcorpus_serialization.mm'))
+            model = self.class_(
+                id2word=dictionary, num_topics=2, passes=100, random_state=0,
+                serialized=True, serialization_path=datapath('testcorpus_serialization.mm')
+            )
             model.update(self.corpus, author2doc)
 
             jill_topics = model.get_author_topics('jill')
@@ -234,13 +269,21 @@ def testTransformSerialized(self):
             remove(datapath('testcorpus_serialization.mm'))
             if passed:
                 break
-            logging.warning("Author-topic model failed to converge on attempt %i (got %s, expected %s)" %
-                            (i, sorted(vec), sorted(expected)))
+            logging.warning(
+                "Author-topic model failed to converge on attempt %i (got %s, expected %s)",
+                i, sorted(vec), sorted(expected)
+            )
         self.assertTrue(passed)
 
     def testAlphaAuto(self):
-        model1 = self.class_(corpus, author2doc=author2doc, id2word=dictionary, alpha='symmetric', passes=10, num_topics=2)
-        modelauto = self.class_(corpus, author2doc=author2doc, id2word=dictionary, alpha='auto', passes=10, num_topics=2)
+        model1 = self.class_(
+            corpus, author2doc=author2doc, id2word=dictionary,
+            alpha='symmetric', passes=10, num_topics=2
+        )
+        modelauto = self.class_(
+            corpus, author2doc=author2doc, id2word=dictionary,
+            alpha='auto', passes=10, num_topics=2
+        )
 
         # did we learn something?
         self.assertFalse(all(np.equal(model1.alpha, modelauto.alpha)))
@@ -301,8 +344,14 @@ def testAlpha(self):
         self.assertRaises(ValueError, self.class_, **kwargs)
 
     def testEtaAuto(self):
-        model1 = self.class_(corpus, author2doc=author2doc, id2word=dictionary, eta='symmetric', passes=10, num_topics=2)
-        modelauto = self.class_(corpus, author2doc=author2doc, id2word=dictionary, eta='auto', passes=10, num_topics=2)
+        model1 = self.class_(
+            corpus, author2doc=author2doc, id2word=dictionary,
+            eta='symmetric', passes=10, num_topics=2
+        )
+        modelauto = self.class_(
+            corpus, author2doc=author2doc, id2word=dictionary,
+            eta='auto', passes=10, num_topics=2
+        )
 
         # did we learn something?
         self.assertFalse(all(np.equal(model1.eta, modelauto.eta)))
@@ -388,7 +437,10 @@ def testGetTopicTerms(self):
 
     def testGetAuthorTopics(self):
 
-        model = self.class_(corpus, author2doc=author2doc, id2word=dictionary, num_topics=2, passes=100, random_state=np.random.seed(0))
+        model = self.class_(
+            corpus, author2doc=author2doc, id2word=dictionary, num_topics=2,
+            passes=100, random_state=np.random.seed(0)
+        )
 
         author_topics = []
         for a in model.id2author.values():
@@ -402,7 +454,10 @@ def testGetAuthorTopics(self):
 
     def testTermTopics(self):
 
-        model = self.class_(corpus, author2doc=author2doc, id2word=dictionary, num_topics=2, passes=100, random_state=np.random.seed(0))
+        model = self.class_(
+            corpus, author2doc=author2doc, id2word=dictionary, num_topics=2,
+            passes=100, random_state=np.random.seed(0)
+        )
 
         # check with word_type
         result = model.get_term_topics(2)
@@ -436,7 +491,7 @@ def testPasses(self):
             for test_rhot in test_rhots:
                 model.update(corpus, author2doc)
 
-                msg = ", ".join(map(str, [passes, model.num_updates, model.state.numdocs]))
+                msg = "{}, {}, {}".format(passes, model.num_updates, model.state.numdocs)
                 self.assertAlmostEqual(final_rhot(), test_rhot, msg=msg)
 
             self.assertEqual(model.state.numdocs, len(corpus) * len(test_rhots))
diff --git a/gensim/test/test_big.py b/gensim/test/test_big.py
index 5e6972bd1f..abf19c63c7 100644
--- a/gensim/test/test_big.py
+++ b/gensim/test/test_big.py
@@ -38,7 +38,7 @@ def __iter__(self):
             doc_len = np.random.poisson(self.doc_len)
             ids = np.random.randint(0, len(self.dictionary), doc_len)
             if self.words_only:
-                yield [str(id) for id in ids]
+                yield [str(idx) for idx in ids]
             else:
                 weights = np.random.poisson(3, doc_len)
                 yield sorted(zip(ids, weights))
@@ -53,21 +53,21 @@ def testWord2Vec(self):
             model = gensim.models.Word2Vec(corpus, size=300, workers=4)
             model.save(testfile(), ignore=['syn1'])
             del model
-            model = gensim.models.Word2Vec.load(testfile())
+            gensim.models.Word2Vec.load(testfile())
 
         def testLsiModel(self):
             corpus = BigCorpus(num_docs=50000)
             model = gensim.models.LsiModel(corpus, num_topics=500, id2word=corpus.dictionary)
             model.save(testfile())
             del model
-            model = gensim.models.LsiModel.load(testfile())
+            gensim.models.LsiModel.load(testfile())
 
         def testLdaModel(self):
             corpus = BigCorpus(num_docs=5000)
             model = gensim.models.LdaModel(corpus, num_topics=500, id2word=corpus.dictionary)
             model.save(testfile())
             del model
-            model = gensim.models.LdaModel.load(testfile())
+            gensim.models.LdaModel.load(testfile())
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_coherencemodel.py b/gensim/test/test_coherencemodel.py
index 039db55a48..229d87d1df 100644
--- a/gensim/test/test_coherencemodel.py
+++ b/gensim/test/test_coherencemodel.py
@@ -53,32 +53,39 @@ def setUp(self):
         # `topics1` is clearly better as it has a clear distinction between system-human
         # interaction and graphs. Hence both the coherence measures for `topics1` should be
         # greater.
-        self.topics1 = [['human', 'computer', 'system', 'interface'],
-                        ['graph', 'minors', 'trees', 'eps']]
-        self.topics2 = [['user', 'graph', 'minors', 'system'],
-                        ['time', 'graph', 'survey', 'minors']]
+        self.topics1 = [
+            ['human', 'computer', 'system', 'interface'],
+            ['graph', 'minors', 'trees', 'eps']
+        ]
+        self.topics2 = [
+            ['user', 'graph', 'minors', 'system'],
+            ['time', 'graph', 'survey', 'minors']
+        ]
         self.ldamodel = LdaModel(
             corpus=self.corpus, id2word=self.dictionary, num_topics=2,
-            passes=0, iterations=0)
+            passes=0, iterations=0
+        )
 
         mallet_home = os.environ.get('MALLET_HOME', None)
         self.mallet_path = os.path.join(mallet_home, 'bin', 'mallet') if mallet_home else None
         if self.mallet_path:
             self.malletmodel = LdaMallet(
                 mallet_path=self.mallet_path, corpus=self.corpus,
-                id2word=self.dictionary, num_topics=2, iterations=0)
+                id2word=self.dictionary, num_topics=2, iterations=0
+            )
 
         vw_path = os.environ.get('VOWPAL_WABBIT_PATH', None)
         if not vw_path:
             logging.info(
-                "Environment variable 'VOWPAL_WABBIT_PATH' not specified,"
-                " skipping sanity checks for LDA Model")
+                "Environment variable 'VOWPAL_WABBIT_PATH' not specified, skipping sanity checks for LDA Model"
+            )
             self.vw_path = None
         else:
             self.vw_path = vw_path
             self.vwmodel = LdaVowpalWabbit(
                 self.vw_path, corpus=self.corpus, id2word=self.dictionary,
-                num_topics=2, passes=0)
+                num_topics=2, passes=0
+            )
 
     def check_coherence_measure(self, coherence):
         """Check provided topic coherence algorithm on given topics"""
@@ -179,20 +186,24 @@ def testErrors(self):
         # not providing dictionary
         self.assertRaises(
             ValueError, CoherenceModel, topics=self.topics1, corpus=self.corpus,
-            coherence='u_mass')
+            coherence='u_mass'
+        )
         # not providing texts for c_v and instead providing corpus
         self.assertRaises(
             ValueError, CoherenceModel, topics=self.topics1, corpus=self.corpus,
-            dictionary=self.dictionary, coherence='c_v')
+            dictionary=self.dictionary, coherence='c_v'
+        )
         # not providing corpus or texts for u_mass
         self.assertRaises(
             ValueError, CoherenceModel, topics=self.topics1, dictionary=self.dictionary,
-            coherence='u_mass')
+            coherence='u_mass'
+        )
 
     def testPersistence(self):
         fname = testfile()
         model = CoherenceModel(
-            topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass')
+            topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass'
+        )
         model.save(fname)
         model2 = CoherenceModel.load(fname)
         self.assertTrue(model.get_coherence() == model2.get_coherence())
@@ -200,7 +211,8 @@ def testPersistence(self):
     def testPersistenceCompressed(self):
         fname = testfile() + '.gz'
         model = CoherenceModel(
-            topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass')
+            topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass'
+        )
         model.save(fname)
         model2 = CoherenceModel.load(fname)
         self.assertTrue(model.get_coherence() == model2.get_coherence())
@@ -208,7 +220,8 @@ def testPersistenceCompressed(self):
     def testPersistenceAfterProbabilityEstimationUsingCorpus(self):
         fname = testfile()
         model = CoherenceModel(
-            topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass')
+            topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass'
+        )
         model.estimate_probabilities()
         model.save(fname)
         model2 = CoherenceModel.load(fname)
@@ -218,7 +231,8 @@ def testPersistenceAfterProbabilityEstimationUsingCorpus(self):
     def testPersistenceAfterProbabilityEstimationUsingTexts(self):
         fname = testfile()
         model = CoherenceModel(
-            topics=self.topics1, texts=self.texts, dictionary=self.dictionary, coherence='c_v')
+            topics=self.topics1, texts=self.texts, dictionary=self.dictionary, coherence='c_v'
+        )
         model.estimate_probabilities()
         model.save(fname)
         model2 = CoherenceModel.load(fname)
diff --git a/gensim/test/test_corpora_dictionary.py b/gensim/test/test_corpora_dictionary.py
index e5a5786613..f6c7d8b43c 100644
--- a/gensim/test/test_corpora_dictionary.py
+++ b/gensim/test/test_corpora_dictionary.py
@@ -43,7 +43,8 @@ def setUp(self):
                 ['trees'],
                 ['graph', 'trees'],
                 ['graph', 'minors', 'trees'],
-                ['graph', 'minors', 'survey']]
+                ['graph', 'minors', 'survey']
+        ]
 
     def testDocFreqOneDoc(self):
         texts = [['human', 'interface', 'computer']]
@@ -102,9 +103,10 @@ def testBuild(self):
         self.assertEqual(sorted(d.dfs.keys()), expected_keys)
         self.assertEqual(sorted(d.dfs.values()), expected_values)
 
-        expected_keys = sorted(['computer', 'eps', 'graph', 'human',
-                                'interface', 'minors', 'response', 'survey',
-                                'system', 'time', 'trees', 'user'])
+        expected_keys = sorted([
+            'computer', 'eps', 'graph', 'human', 'interface',
+            'minors', 'response', 'survey', 'system', 'time', 'trees', 'user'
+        ])
         expected_values = list(range(12))
         self.assertEqual(sorted(d.token2id.keys()), expected_keys)
         self.assertEqual(sorted(d.token2id.values()), expected_values)
@@ -127,21 +129,21 @@ def testFilterKeepTokens_keepTokens(self):
         # provide keep_tokens argument, keep the tokens given
         d = Dictionary(self.texts)
         d.filter_extremes(no_below=3, no_above=1.0, keep_tokens=['human', 'survey'])
-        expected = set(['graph', 'trees', 'human', 'system', 'user', 'survey'])
+        expected = {'graph', 'trees', 'human', 'system', 'user', 'survey'}
         self.assertEqual(set(d.token2id.keys()), expected)
 
     def testFilterKeepTokens_unchangedFunctionality(self):
         # do not provide keep_tokens argument, filter_extremes functionality is unchanged
         d = Dictionary(self.texts)
         d.filter_extremes(no_below=3, no_above=1.0)
-        expected = set(['graph', 'trees', 'system', 'user'])
+        expected = {'graph', 'trees', 'system', 'user'}
         self.assertEqual(set(d.token2id.keys()), expected)
 
     def testFilterKeepTokens_unseenToken(self):
         # do provide keep_tokens argument with unseen tokens, filter_extremes functionality is unchanged
         d = Dictionary(self.texts)
         d.filter_extremes(no_below=3, no_above=1.0, keep_tokens=['unknown_token'])
-        expected = set(['graph', 'trees', 'system', 'user'])
+        expected = {'graph', 'trees', 'system', 'user'}
         self.assertEqual(set(d.token2id.keys()), expected)
 
     def testFilterMostFrequent(self):
@@ -160,7 +162,8 @@ def testFilterTokens(self):
         expected = {
             'computer': 0, 'eps': 8, 'graph': 10, 'human': 1,
             'interface': 2, 'minors': 11, 'response': 3, 'survey': 4,
-            'system': 5, 'time': 6, 'trees': 9, 'user': 7}
+            'system': 5, 'time': 6, 'trees': 9, 'user': 7
+        }
         del expected[removed_word]
         self.assertEqual(sorted(d.token2id.keys()), sorted(expected.keys()))
 
@@ -186,7 +189,8 @@ def test_saveAsText(self):
         small_text = [
             ["prvé", "slovo"],
             ["slovo", "druhé"],
-            ["druhé", "slovo"]]
+            ["druhé", "slovo"]
+        ]
 
         d = Dictionary(small_text)
 
@@ -264,7 +268,8 @@ def test_from_corpus(self):
             "The generation of random binary unordered trees",
             "The intersection graph of paths in trees",
             "Graph minors IV Widths of trees and well quasi ordering",
-            "Graph minors A survey"]
+            "Graph minors A survey"
+        ]
         stoplist = set('for a of the and to in'.split())
         texts = [
             [word for word in document.lower().split() if word not in stoplist]
diff --git a/gensim/test/test_corpora_hashdictionary.py b/gensim/test/test_corpora_hashdictionary.py
index 6f314d65ee..808246dc59 100644
--- a/gensim/test/test_corpora_hashdictionary.py
+++ b/gensim/test/test_corpora_hashdictionary.py
@@ -36,7 +36,8 @@ def setUp(self):
                 ['trees'],
                 ['graph', 'trees'],
                 ['graph', 'minors', 'trees'],
-                ['graph', 'minors', 'survey']]
+                ['graph', 'minors', 'survey']
+        ]
 
     def testDocFreqOneDoc(self):
         texts = [['human', 'interface', 'computer']]
@@ -92,7 +93,7 @@ def testDebugMode(self):
         # two words
         texts = [['human', 'cat']]
         d = HashDictionary(texts, debug=True, myhash=zlib.adler32)
-        expected = {9273: set(['cat']), 31002: set(['human'])}
+        expected = {9273: {'cat'}, 31002: {'human'}}
         self.assertEqual(d.id2token, expected)
 
         # now the same thing, with debug off
@@ -105,8 +106,17 @@ def testRange(self):
         # all words map to the same id
         d = HashDictionary(self.texts, id_range=1, debug=True)
         dfs = {0: 9}
-        id2token = {0: set(['minors', 'graph', 'system', 'trees', 'eps', 'computer', 'survey', 'user', 'human', 'time', 'interface', 'response'])}
-        token2id = {'minors': 0, 'graph': 0, 'system': 0, 'trees': 0, 'eps': 0, 'computer': 0, 'survey': 0, 'user': 0, 'human': 0, 'time': 0, 'interface': 0, 'response': 0}
+        id2token = {
+            0: {
+                'minors', 'graph', 'system', 'trees', 'eps', 'computer',
+                'survey', 'user', 'human', 'time', 'interface', 'response'
+            }
+        }
+        token2id = {
+            'minors': 0, 'graph': 0, 'system': 0, 'trees': 0,
+            'eps': 0, 'computer': 0, 'survey': 0, 'user': 0,
+            'human': 0, 'time': 0, 'interface': 0, 'response': 0
+        }
         self.assertEqual(d.dfs, dfs)
         self.assertEqual(d.id2token, id2token)
         self.assertEqual(d.token2id, token2id)
@@ -114,29 +124,31 @@ def testRange(self):
         # 2 ids: 0/1 for even/odd number of bytes in the word
         d = HashDictionary(self.texts, id_range=2, myhash=lambda key: len(key))
         dfs = {0: 7, 1: 7}
-        id2token = {0: set(['minors', 'system', 'computer', 'survey', 'user', 'time', 'response']), 1: set(['interface', 'graph', 'trees', 'eps', 'human'])}
-        token2id = {'minors': 0, 'graph': 1, 'system': 0, 'trees': 1, 'eps': 1, 'computer': 0, 'survey': 0, 'user': 0, 'human': 1, 'time': 0, 'interface': 1, 'response': 0}
+        id2token = {
+            0: {'minors', 'system', 'computer', 'survey', 'user', 'time', 'response'},
+            1: {'interface', 'graph', 'trees', 'eps', 'human'}
+        }
+        token2id = {
+            'minors': 0, 'graph': 1, 'system': 0, 'trees': 1, 'eps': 1, 'computer': 0,
+            'survey': 0, 'user': 0, 'human': 1, 'time': 0, 'interface': 1, 'response': 0
+        }
         self.assertEqual(d.dfs, dfs)
         self.assertEqual(d.id2token, id2token)
         self.assertEqual(d.token2id, token2id)
 
     def testBuild(self):
         d = HashDictionary(self.texts, myhash=zlib.adler32)
-        expected = {5232: 2,
-                     5798: 3,
-                     10608: 2,
-                     12466: 2,
-                     12736: 3,
-                     15001: 2,
-                     18451: 3,
-                     23844: 3,
-                     28591: 2,
-                     29104: 2,
-                     31002: 2,
-                     31049: 2}
+        expected = {
+            5232: 2, 5798: 3, 10608: 2, 12466: 2, 12736: 3, 15001: 2,
+            18451: 3, 23844: 3, 28591: 2, 29104: 2, 31002: 2, 31049: 2
+        }
 
         self.assertEqual(d.dfs, expected)
-        expected = {'minors': 15001, 'graph': 18451, 'system': 5798, 'trees': 23844, 'eps': 31049, 'computer': 10608, 'survey': 28591, 'user': 12736, 'human': 31002, 'time': 29104, 'interface': 12466, 'response': 5232}
+        expected = {
+            'minors': 15001, 'graph': 18451, 'system': 5798, 'trees': 23844,
+            'eps': 31049, 'computer': 10608, 'survey': 28591, 'user': 12736,
+            'human': 31002, 'time': 29104, 'interface': 12466, 'response': 5232
+        }
 
         for ex in expected:
             self.assertEqual(d.token2id[ex], expected[ex])
@@ -149,7 +161,10 @@ def testFilter(self):
 
         d = HashDictionary(self.texts, myhash=zlib.adler32)
         d.filter_extremes(no_below=0, no_above=0.3)
-        expected = {29104: 2, 31049: 2, 28591: 2, 5232: 2, 10608: 2, 12466: 2, 15001: 2, 31002: 2}
+        expected = {
+            29104: 2, 31049: 2, 28591: 2, 5232: 2,
+            10608: 2, 12466: 2, 15001: 2, 31002: 2
+        }
         self.assertEqual(d.dfs, expected)
 
         d = HashDictionary(self.texts, myhash=zlib.adler32)
diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py
index ad4eb4c976..9a66c50b0a 100644
--- a/gensim/test/test_doc2vec.py
+++ b/gensim/test/test_doc2vec.py
@@ -229,20 +229,26 @@ def test_dbow_hs(self):
 
     def test_dmm_hs(self):
         """Test DM/mean doc2vec training."""
-        model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_mean=1, size=24, window=4, hs=1, negative=0,
-                                alpha=0.05, min_count=2, iter=20)
+        model = doc2vec.Doc2Vec(
+            list_corpus, dm=1, dm_mean=1, size=24, window=4,
+            hs=1, negative=0, alpha=0.05, min_count=2, iter=20
+        )
         self.model_sanity(model)
 
     def test_dms_hs(self):
         """Test DM/sum doc2vec training."""
-        model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_mean=0, size=24, window=4, hs=1, negative=0,
-                                alpha=0.05, min_count=2, iter=20)
+        model = doc2vec.Doc2Vec(
+            list_corpus, dm=1, dm_mean=0, size=24, window=4, hs=1,
+            negative=0, alpha=0.05, min_count=2, iter=20
+        )
         self.model_sanity(model)
 
     def test_dmc_hs(self):
         """Test DM/concatenate doc2vec training."""
-        model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_concat=1, size=24, window=4, hs=1, negative=0,
-                                alpha=0.05, min_count=2, iter=20)
+        model = doc2vec.Doc2Vec(
+            list_corpus, dm=1, dm_concat=1, size=24, window=4,
+            hs=1, negative=0, alpha=0.05, min_count=2, iter=20
+        )
         self.model_sanity(model)
 
     def test_dbow_neg(self):
@@ -252,20 +258,26 @@ def test_dbow_neg(self):
 
     def test_dmm_neg(self):
         """Test DM/mean doc2vec training."""
-        model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_mean=1, size=24, window=4, hs=0, negative=10,
-                                alpha=0.05, min_count=2, iter=20)
+        model = doc2vec.Doc2Vec(
+            list_corpus, dm=1, dm_mean=1, size=24, window=4, hs=0,
+            negative=10, alpha=0.05, min_count=2, iter=20
+        )
         self.model_sanity(model)
 
     def test_dms_neg(self):
         """Test DM/sum doc2vec training."""
-        model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_mean=0, size=24, window=4, hs=0, negative=10,
-                                alpha=0.05, min_count=2, iter=20)
+        model = doc2vec.Doc2Vec(
+            list_corpus, dm=1, dm_mean=0, size=24, window=4, hs=0,
+            negative=10, alpha=0.05, min_count=2, iter=20
+        )
         self.model_sanity(model)
 
     def test_dmc_neg(self):
         """Test DM/concatenate doc2vec training."""
-        model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_concat=1, size=24, window=4, hs=0, negative=10,
-                                alpha=0.05, min_count=2, iter=20)
+        model = doc2vec.Doc2Vec(
+            list_corpus, dm=1, dm_concat=1, size=24, window=4, hs=0,
+            negative=10, alpha=0.05, min_count=2, iter=20
+        )
         self.model_sanity(model)
 
     def test_parallel(self):
@@ -296,10 +308,14 @@ def test_deterministic_neg(self):
     def test_deterministic_dmc(self):
         """Test doc2vec results identical with identical RNG seed."""
         # bigger, dmc
-        model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=24, window=4, hs=1, negative=3,
-                                seed=42, workers=1)
-        model2 = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=24, window=4, hs=1, negative=3,
-                                 seed=42, workers=1)
+        model = doc2vec.Doc2Vec(
+            DocsLeeCorpus(), dm=1, dm_concat=1, size=24,
+            window=4, hs=1, negative=3, seed=42, workers=1
+        )
+        model2 = doc2vec.Doc2Vec(
+            DocsLeeCorpus(), dm=1, dm_concat=1, size=24,
+            window=4, hs=1, negative=3, seed=42, workers=1
+        )
         self.models_equal(model, model2)
 
     def test_mixed_tag_types(self):
@@ -341,12 +357,18 @@ def test_delete_temporary_training_data(self):
                 self.assertTrue(not hasattr(model, 'syn0_lockf'))
                 self.assertTrue(model.docvecs and not hasattr(model.docvecs, 'doctag_syn0'))
                 self.assertTrue(model.docvecs and not hasattr(model.docvecs, 'doctag_syn0_lockf'))
-        model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_mean=1, size=24, window=4, hs=1, negative=0, alpha=0.05, min_count=2, iter=20)
+        model = doc2vec.Doc2Vec(
+            list_corpus, dm=1, dm_mean=1, size=24, window=4, hs=1,
+            negative=0, alpha=0.05, min_count=2, iter=20
+        )
         model.delete_temporary_training_data(keep_doctags_vectors=True, keep_inference=True)
         self.assertTrue(model.docvecs and hasattr(model.docvecs, 'doctag_syn0'))
         self.assertTrue(hasattr(model, 'syn1'))
         self.model_sanity(model, keep_training=False)
-        model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_mean=1, size=24, window=4, hs=0, negative=1, alpha=0.05, min_count=2, iter=20)
+        model = doc2vec.Doc2Vec(
+            list_corpus, dm=1, dm_mean=1, size=24, window=4, hs=0,
+            negative=1, alpha=0.05, min_count=2, iter=20
+        )
         model.delete_temporary_training_data(keep_doctags_vectors=True, keep_inference=True)
         self.model_sanity(model, keep_training=False)
         self.assertTrue(hasattr(model, 'syn1neg'))
@@ -436,11 +458,13 @@ def read_su_sentiment_rotten_tomatoes(dirname, lowercase=True):
     http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip
     has been expanded. It's not too big, so compose entirely into memory.
     """
-    logging.info("loading corpus from %s" % dirname)
+    logging.info("loading corpus from %s", dirname)
 
     # many mangled chars in sentences (datasetSentences.txt)
-    chars_sst_mangled = ['à', 'á', 'â', 'ã', 'æ', 'ç', 'è', 'é', 'í',
-                         'í', 'ï', 'ñ', 'ó', 'ô', 'ö', 'û', 'ü']
+    chars_sst_mangled = [
+        'à', 'á', 'â', 'ã', 'æ', 'ç', 'è', 'é', 'í',
+        'í', 'ï', 'ñ', 'ó', 'ô', 'ö', 'û', 'ü'
+    ]
     sentence_fixups = [(char.encode('utf-8').decode('latin1'), char) for char in chars_sst_mangled]
     # more junk, and the replace necessary for sentence-phrase consistency
     sentence_fixups.extend([
@@ -502,8 +526,10 @@ def read_su_sentiment_rotten_tomatoes(dirname, lowercase=True):
     assert len([phrase for phrase in phrases if phrase.split == 'test']) == 2210  # 'test'
     assert len([phrase for phrase in phrases if phrase.split == 'dev']) == 1100  # 'dev'
 
-    logging.info("loaded corpus with %i sentences and %i phrases from %s",
-                 len(info_by_sentence), len(phrases), dirname)
+    logging.info(
+        "loaded corpus with %i sentences and %i phrases from %s",
+        len(info_by_sentence), len(phrases), dirname
+    )
 
     return phrases
 
diff --git a/gensim/test/test_dtm.py b/gensim/test/test_dtm.py
index 81b48374ab..231bbb1932 100644
--- a/gensim/test/test_dtm.py
+++ b/gensim/test/test_dtm.py
@@ -39,7 +39,8 @@ def testDtm(self):
             model = gensim.models.wrappers.DtmModel(
                 self.dtm_path, self.corpus, self.time_slices, num_topics=2,
                 id2word=self.id2word, model='dtm', initialize_lda=True,
-                rng_seed=1)
+                rng_seed=1
+            )
             topics = model.show_topics(num_topics=2, times=2, num_words=10)
             self.assertEqual(len(topics), 4)
 
@@ -52,7 +53,8 @@ def testDim(self):
             model = gensim.models.wrappers.DtmModel(
                 self.dtm_path, self.corpus, self.time_slices, num_topics=2,
                 id2word=self.id2word, model='fixed', initialize_lda=True,
-                rng_seed=1)
+                rng_seed=1
+            )
             topics = model.show_topics(num_topics=2, times=2, num_words=10)
             self.assertEqual(len(topics), 4)
 
@@ -67,7 +69,8 @@ def testCalledProcessError(self):
                 gensim.models.wrappers.DtmModel(
                     self.dtm_path, self.corpus, self.time_slices, num_topics=2,
                     id2word=self.id2word, model='dtm', initialize_lda=False,
-                    rng_seed=1)
+                    rng_seed=1
+                )
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_fasttext_wrapper.py b/gensim/test/test_fasttext_wrapper.py
index 6a7a7b09ed..3c00a8f35a 100644
--- a/gensim/test/test_fasttext_wrapper.py
+++ b/gensim/test/test_fasttext_wrapper.py
@@ -56,7 +56,8 @@ def testTraining(self):
             return  # Use self.skipTest once python < 2.7 is no longer supported
         vocab_size, model_size = 1763, 10
         trained_model = fasttext.FastText.train(
-            self.ft_path, self.corpus_file, size=model_size, output_file=testfile())
+            self.ft_path, self.corpus_file, size=model_size, output_file=testfile()
+        )
 
         self.assertEqual(trained_model.wv.syn0.shape, (vocab_size, model_size))
         self.assertEqual(len(trained_model.wv.vocab), vocab_size)
@@ -72,11 +73,13 @@ def testMinCount(self):
             logger.info("FT_HOME env variable not set, skipping test")
             return  # Use self.skipTest once python < 2.7 is no longer supported
         test_model_min_count_5 = fasttext.FastText.train(
-            self.ft_path, self.corpus_file, output_file=testfile(), size=10, min_count=5)
+            self.ft_path, self.corpus_file, output_file=testfile(), size=10, min_count=5
+        )
         self.assertTrue('forests' not in test_model_min_count_5.wv.vocab)
 
         test_model_min_count_1 = fasttext.FastText.train(
-            self.ft_path, self.corpus_file, output_file=testfile(), size=10, min_count=1)
+            self.ft_path, self.corpus_file, output_file=testfile(), size=10, min_count=1
+        )
         self.assertTrue('forests' in test_model_min_count_1.wv.vocab)
 
     def testModelSize(self):
@@ -85,7 +88,8 @@ def testModelSize(self):
             logger.info("FT_HOME env variable not set, skipping test")
             return  # Use self.skipTest once python < 2.7 is no longer supported
         test_model_size_20 = fasttext.FastText.train(
-            self.ft_path, self.corpus_file, output_file=testfile(), size=20)
+            self.ft_path, self.corpus_file, output_file=testfile(), size=20
+        )
         self.assertEqual(test_model_size_20.vector_size, 20)
         self.assertEqual(test_model_size_20.wv.syn0.shape[1], 20)
         self.assertEqual(test_model_size_20.wv.syn0_all.shape[1], 20)
@@ -245,8 +249,10 @@ def testNSimilarity(self):
         self.assertEqual(self.test_model.n_similarity(['the'], ['and']), self.test_model.n_similarity(['and'], ['the']))
         # Out of vocab check
         self.assertTrue(numpy.allclose(self.test_model.n_similarity(['night', 'nights'], ['nights', 'night']), 1.0))
-        self.assertEqual(self.test_model.n_similarity(['night'], ['nights']),
-                         self.test_model.n_similarity(['nights'], ['night']))
+        self.assertEqual(
+            self.test_model.n_similarity(['night'], ['nights']),
+            self.test_model.n_similarity(['nights'], ['night'])
+        )
 
     def testSimilarity(self):
         """Test similarity for in-vocab and out-of-vocab words"""
diff --git a/gensim/test/test_glove2word2vec.py b/gensim/test/test_glove2word2vec.py
index b638ca927d..2226bf9fd2 100644
--- a/gensim/test/test_glove2word2vec.py
+++ b/gensim/test/test_glove2word2vec.py
@@ -31,7 +31,10 @@ def setUp(self):
         self.output_file = testfile()
 
     def testConversion(self):
-        output = check_output(args=['python', '-m', 'gensim.scripts.glove2word2vec', '--input', self.datapath, '--output', self.output_file])  # noqa:F841
+        check_output(args=[
+            'python', '-m', 'gensim.scripts.glove2word2vec',
+            '--input', self.datapath, '--output', self.output_file
+        ])
         # test that the converted model loads successfully
         try:
             self.test_model = gensim.models.KeyedVectors.load_word2vec_format(self.output_file)
@@ -40,7 +43,9 @@ def testConversion(self):
             if os.path.isfile(os.path.join(self.output_file)):
                 self.fail('model file %s was created but could not be loaded.' % self.output_file)
             else:
-                self.fail('model file %s creation failed, check the parameters and input file format.' % self.output_file)
+                self.fail(
+                    'model file %s creation failed, check the parameters and input file format.' % self.output_file
+                )
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_hdpmodel.py b/gensim/test/test_hdpmodel.py
index 647b31ad7e..b3cf8bdde1 100644
--- a/gensim/test/test_hdpmodel.py
+++ b/gensim/test/test_hdpmodel.py
@@ -17,7 +17,7 @@
 
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models import hdpmodel
-from gensim.test import basetests
+from gensim.test import basetmtests
 
 import numpy as np
 
@@ -26,15 +26,17 @@
 
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [['human', 'interface', 'computer'],
- ['survey', 'user', 'computer', 'system', 'response', 'time'],
- ['eps', 'user', 'interface', 'system'],
- ['system', 'human', 'system', 'eps'],
- ['user', 'response', 'time'],
- ['trees'],
- ['graph', 'trees'],
- ['graph', 'minors', 'trees'],
- ['graph', 'minors', 'survey']]
+texts = [
+    ['human', 'interface', 'computer'],
+    ['survey', 'user', 'computer', 'system', 'response', 'time'],
+    ['eps', 'user', 'interface', 'system'],
+    ['system', 'human', 'system', 'eps'],
+    ['user', 'response', 'time'],
+    ['trees'],
+    ['graph', 'trees'],
+    ['graph', 'minors', 'trees'],
+    ['graph', 'minors', 'survey']
+]
 dictionary = Dictionary(texts)
 corpus = [dictionary.doc2bow(text) for text in texts]
 
@@ -44,7 +46,7 @@ def testfile():
     return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
 
 
-class TestHdpModel(unittest.TestCase, basetests.TestBaseTopicModel):
+class TestHdpModel(unittest.TestCase, basetmtests.TestBaseTopicModel):
     def setUp(self):
         self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
         self.class_ = hdpmodel.HdpModel
diff --git a/gensim/test/test_keras_integration.py b/gensim/test/test_keras_integration.py
index 7f321c6565..996f1b7f7b 100644
--- a/gensim/test/test_keras_integration.py
+++ b/gensim/test/test_keras_integration.py
@@ -79,7 +79,10 @@ def testEmbeddingLayerCosineSim(self):
 
         word_a = 'graph'
         word_b = 'trees'
-        output = model.predict([np.asarray([keras_w2v_model.wv.vocab[word_a].index]), np.asarray([keras_w2v_model.wv.vocab[word_b].index])])
+        output = model.predict([
+            np.asarray([keras_w2v_model.wv.vocab[word_a].index]),
+            np.asarray([keras_w2v_model.wv.vocab[word_b].index])
+        ])
         # output is the cosine distance between the two words (as a similarity measure)
 
         self.assertTrue(type(output[0][0][0]) == np.float32)     # verify that  a float is returned
@@ -113,7 +116,7 @@ def testEmbeddingLayer20NewsGroup(self):
                     texts_w2v.append(sentence.split(' '))
                     labels.append(label_id)
             except Exception:
-                None
+                pass
 
         # Vectorize the text samples into a 2D integer tensor
         tokenizer = Tokenizer()
@@ -128,11 +131,11 @@ def testEmbeddingLayer20NewsGroup(self):
         y_train = labels
 
         # prepare the embedding layer using the wrapper
-        Keras_w2v = self.model_twenty_ng
-        Keras_w2v.build_vocab(texts_w2v)
-        Keras_w2v.train(texts, total_examples=Keras_w2v.corpus_count, epochs=Keras_w2v.iter)
-        Keras_w2v_wv = Keras_w2v.wv
-        embedding_layer = Keras_w2v_wv.get_embedding_layer()
+        keras_w2v = self.model_twenty_ng
+        keras_w2v.build_vocab(texts_w2v)
+        keras_w2v.train(texts, total_examples=keras_w2v.corpus_count, epochs=keras_w2v.iter)
+        keras_w2v_wv = keras_w2v.wv
+        embedding_layer = keras_w2v_wv.get_embedding_layer()
 
         # create a 1D convnet to solve our classification task
         sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
diff --git a/gensim/test/test_keywords.py b/gensim/test/test_keywords.py
index 8a3be3af5c..76bd448d5c 100644
--- a/gensim/test/test_keywords.py
+++ b/gensim/test/test_keywords.py
@@ -35,7 +35,7 @@ def test_text_keywords(self):
         with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.kw.txt"), mode="r") as f:
             kw = f.read().strip().split("\n")
 
-        self.assertEqual(set(map(str, generated_keywords)), set(map(str, kw)))
+        self.assertEqual({str(x) for x in generated_keywords}, {str(x) for x in kw})
 
     def test_text_keywords_words(self):
         pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
@@ -55,13 +55,13 @@ def test_text_keywords_pos(self):
             text = f.read()
 
         # calculate keywords using only certain parts of speech
-        generated_keywords_NNVBJJ = keywords(text, pos_filter=['NN', 'VB', 'JJ'], ratio=0.3, split=True)
+        generated_keywords_nnvbjj = keywords(text, pos_filter=['NN', 'VB', 'JJ'], ratio=0.3, split=True)
 
         # To be compared to the reference.
         with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.kwpos.txt"), mode="r") as f:
             kw = f.read().strip().split("\n")
 
-        self.assertEqual(set(map(str, generated_keywords_NNVBJJ)), set(map(str, kw)))
+        self.assertEqual({str(x) for x in generated_keywords_nnvbjj}, {str(x) for x in kw})
 
     def test_text_summarization_raises_exception_on_short_input_text(self):
         pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
diff --git a/gensim/test/test_ldamallet_wrapper.py b/gensim/test/test_ldamallet_wrapper.py
index a7d64839bb..5ed4486e16 100644
--- a/gensim/test/test_ldamallet_wrapper.py
+++ b/gensim/test/test_ldamallet_wrapper.py
@@ -21,21 +21,23 @@
 from gensim.models.wrappers import ldamallet
 from gensim import matutils
 from gensim.models import ldamodel
-from gensim.test import basetests
+from gensim.test import basetmtests
 
 module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [['human', 'interface', 'computer'],
- ['survey', 'user', 'computer', 'system', 'response', 'time'],
- ['eps', 'user', 'interface', 'system'],
- ['system', 'human', 'system', 'eps'],
- ['user', 'response', 'time'],
- ['trees'],
- ['graph', 'trees'],
- ['graph', 'minors', 'trees'],
- ['graph', 'minors', 'survey']]
+texts = [
+    ['human', 'interface', 'computer'],
+    ['survey', 'user', 'computer', 'system', 'response', 'time'],
+    ['eps', 'user', 'interface', 'system'],
+    ['system', 'human', 'system', 'eps'],
+    ['user', 'response', 'time'],
+    ['trees'],
+    ['graph', 'trees'],
+    ['graph', 'minors', 'trees'],
+    ['graph', 'minors', 'survey']
+]
 
 dictionary = Dictionary(texts)
 corpus = [dictionary.doc2bow(text) for text in texts]
@@ -46,7 +48,7 @@ def testfile():
     return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
 
 
-class TestLdaMallet(unittest.TestCase, basetests.TestBaseTopicModel):
+class TestLdaMallet(unittest.TestCase, basetmtests.TestBaseTopicModel):
     def setUp(self):
         mallet_home = os.environ.get('MALLET_HOME', None)
         self.mallet_path = os.path.join(mallet_home, 'bin', 'mallet') if mallet_home else None
@@ -72,8 +74,10 @@ def testTransform(self):
             passed = np.allclose(sorted(vec), sorted(expected), atol=1e-1)  # must contain the same values, up to re-ordering
             if passed:
                 break
-            logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" %
-                            (i, sorted(vec), sorted(expected)))
+            logging.warning(
+                "LDA failed to converge on attempt %i (got %s, expected %s)",
+                i, sorted(vec), sorted(expected)
+            )
         self.assertTrue(passed)
 
     def testSparseTransform(self):
@@ -82,7 +86,9 @@ def testSparseTransform(self):
         passed = False
         for i in range(5):  # restart at most 5 times
             # create the sparse transformation model with the appropriate topic_threshold
-            model = ldamallet.LdaMallet(self.mallet_path, corpus, id2word=dictionary, num_topics=2, iterations=200, topic_threshold=0.5)
+            model = ldamallet.LdaMallet(
+                self.mallet_path, corpus, id2word=dictionary, num_topics=2, iterations=200, topic_threshold=0.5
+            )
             # transform one document
             doc = list(corpus)[0]
             transformed = model[doc]
@@ -91,8 +97,10 @@ def testSparseTransform(self):
             passed = np.allclose(sorted(vec), sorted(expected), atol=1e-2)  # must contain the same values, up to re-ordering
             if passed:
                 break
-            logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" %
-                            (i, sorted(vec), sorted(expected)))
+            logging.warning(
+                "LDA failed to converge on attempt %i (got %s, expected %s)",
+                i, sorted(vec), sorted(expected)
+            )
         self.assertTrue(passed)
 
     def testMallet2Model(self):
diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py
index 71cc10f70c..c1d35c2661 100644
--- a/gensim/test/test_ldamodel.py
+++ b/gensim/test/test_ldamodel.py
@@ -22,22 +22,24 @@
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models import ldamodel, ldamulticore
 from gensim import matutils, utils
-from gensim.test import basetests
+from gensim.test import basetmtests
 
 module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [['human', 'interface', 'computer'],
- ['survey', 'user', 'computer', 'system', 'response', 'time'],
- ['eps', 'user', 'interface', 'system'],
- ['system', 'human', 'system', 'eps'],
- ['user', 'response', 'time'],
- ['trees'],
- ['graph', 'trees'],
- ['graph', 'minors', 'trees'],
- ['graph', 'minors', 'survey']]
+texts = [
+    ['human', 'interface', 'computer'],
+    ['survey', 'user', 'computer', 'system', 'response', 'time'],
+    ['eps', 'user', 'interface', 'system'],
+    ['system', 'human', 'system', 'eps'],
+    ['user', 'response', 'time'],
+    ['trees'],
+    ['graph', 'trees'],
+    ['graph', 'minors', 'trees'],
+    ['graph', 'minors', 'survey']
+]
 dictionary = Dictionary(texts)
 corpus = [dictionary.doc2bow(text) for text in texts]
 
@@ -54,7 +56,7 @@ def testRandomState():
         assert(isinstance(utils.get_random_state(testcase), np.random.RandomState))
 
 
-class TestLdaModel(unittest.TestCase, basetests.TestBaseTopicModel):
+class TestLdaModel(unittest.TestCase, basetmtests.TestBaseTopicModel):
     def setUp(self):
         self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
         self.class_ = ldamodel.LdaModel
@@ -79,8 +81,7 @@ def testTransform(self):
             passed = np.allclose(sorted(vec), sorted(expected), atol=1e-1)  # must contain the same values, up to re-ordering
             if passed:
                 break
-            logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)" %
-                            (i, sorted(vec), sorted(expected)))
+            logging.warning("LDA failed to converge on attempt %i (got %s, expected %s)", i, sorted(vec), sorted(expected))
         self.assertTrue(passed)
 
     def testAlphaAuto(self):
@@ -231,7 +232,9 @@ def testGetTopicTerms(self):
 
     def testGetDocumentTopics(self):
 
-        model = self.class_(self.corpus, id2word=dictionary, num_topics=2, passes=100, random_state=np.random.seed(0))
+        model = self.class_(
+            self.corpus, id2word=dictionary, num_topics=2, passes=100, random_state=np.random.seed(0)
+        )
 
         doc_topics = model.get_document_topics(self.corpus)
 
@@ -264,7 +267,9 @@ def testGetDocumentTopics(self):
         doc_topic_count_na = 0
         word_phi_count_na = 0
 
-        all_topics = model.get_document_topics(self.corpus, minimum_probability=0.8, minimum_phi_value=1.0, per_word_topics=True)
+        all_topics = model.get_document_topics(
+            self.corpus, minimum_probability=0.8, minimum_phi_value=1.0, per_word_topics=True
+        )
 
         self.assertEqual(model.state.numdocs, len(corpus))
 
@@ -313,7 +318,9 @@ def testGetDocumentTopics(self):
 
     def testTermTopics(self):
 
-        model = self.class_(self.corpus, id2word=dictionary, num_topics=2, passes=100, random_state=np.random.seed(0))
+        model = self.class_(
+            self.corpus, id2word=dictionary, num_topics=2, passes=100, random_state=np.random.seed(0)
+        )
 
         # check with word_type
         result = model.get_term_topics(2)
@@ -355,7 +362,7 @@ def testPasses(self):
             for test_rhot in test_rhots:
                 model.update(self.corpus)
 
-                msg = ", ".join(map(str, [passes, model.num_updates, model.state.numdocs]))
+                msg = ", ".join(str(x) for x in [passes, model.num_updates, model.state.numdocs])
                 self.assertAlmostEqual(final_rhot(), test_rhot, msg=msg)
 
             self.assertEqual(model.state.numdocs, len(corpus) * len(test_rhots))
@@ -379,7 +386,7 @@ def testPasses(self):
     #             model = self.class_(id2word=dictionary, num_topics=2, passes=200, eta=eta)
     #             model.update(self.corpus)
 
-    #             topics = [dict((word, p) for p, word in model.show_topic(j, topn=None)) for j in range(2)]
+    #             topics = [{word: p for p, word in model.show_topic(j, topn=None)} for j in range(2)]
 
     #             # check that the word 'system' in the topic we seeded got a high weight,
     #             # and the word 'trees' (the main word in the other topic) a low weight --
@@ -413,8 +420,8 @@ def testModelCompatibilityWithPythonVersions(self):
         self.assertTrue(np.allclose(model_2_7.expElogbeta, model_3_5.expElogbeta))
         tstvec = []
         self.assertTrue(np.allclose(model_2_7[tstvec], model_3_5[tstvec]))  # try projecting an empty vector
-        id2word_2_7 = dict((k, v) for k, v in model_2_7.id2word.iteritems())
-        id2word_3_5 = dict((k, v) for k, v in model_3_5.id2word.iteritems())
+        id2word_2_7 = dict(model_2_7.id2word.iteritems())
+        id2word_3_5 = dict(model_3_5.id2word.iteritems())
         self.assertEqual(set(id2word_2_7.keys()), set(id2word_3_5.keys()))
 
     def testPersistenceIgnore(self):
diff --git a/gensim/test/test_ldaseqmodel.py b/gensim/test/test_ldaseqmodel.py
index 4ca7b104fc..d38c01868c 100644
--- a/gensim/test/test_ldaseqmodel.py
+++ b/gensim/test/test_ldaseqmodel.py
@@ -20,25 +20,175 @@ class TestLdaSeq(unittest.TestCase):
         # we are setting up a DTM model and fitting it, and checking topic-word and doc-topic results.
     def setUp(self):
         texts = [
-            [u'senior', u'studios', u'studios', u'studios', u'creators', u'award', u'mobile', u'currently', u'challenges', u'senior', u'summary', u'senior', u'motivated', u'creative', u'senior'],
-            [u'performs', u'engineering', u'tasks', u'infrastructure', u'focusing', u'primarily', u'programming', u'interaction', u'designers', u'engineers', u'leadership', u'teams', u'teams', u'crews', u'responsibilities', u'engineering', u'quality', u'functional', u'functional', u'teams', u'organizing', u'prioritizing', u'technical', u'decisions', u'engineering', u'participates', u'participates', u'reviews', u'participates', u'hiring', u'conducting', u'interviews'],
-            [u'feedback', u'departments', u'define', u'focusing', u'engineering', u'teams', u'crews', u'facilitate', u'engineering', u'departments', u'deadlines', u'milestones', u'typically', u'spends', u'designing', u'developing', u'updating', u'bugs', u'mentoring', u'engineers', u'define', u'schedules', u'milestones', u'participating'],
-            [u'reviews', u'interviews', u'sized', u'teams', u'interacts', u'disciplines', u'knowledge', u'skills', u'knowledge', u'knowledge', u'xcode', u'scripting', u'debugging', u'skills', u'skills', u'knowledge', u'disciplines', u'animation', u'networking', u'expertise', u'competencies', u'oral', u'skills', u'management', u'skills', u'proven', u'effectively', u'teams', u'deadline', u'environment', u'bachelor', u'minimum', u'shipped', u'leadership', u'teams', u'location', u'resumes', u'jobs', u'candidates', u'openings', u'jobs'],
-            [u'maryland', u'client', u'producers', u'electricity', u'operates', u'storage', u'utility', u'retail', u'customers', u'engineering', u'consultant', u'maryland', u'summary', u'technical', u'technology', u'departments', u'expertise', u'maximizing', u'output', u'reduces', u'operating', u'participates', u'areas', u'engineering', u'conducts', u'testing', u'solve', u'supports', u'environmental', u'understands', u'objectives', u'operates', u'responsibilities', u'handles', u'complex', u'engineering', u'aspects', u'monitors', u'quality', u'proficiency', u'optimization', u'recommendations', u'supports', u'personnel', u'troubleshooting', u'commissioning', u'startup', u'shutdown', u'supports', u'procedure', u'operating', u'units', u'develops', u'simulations', u'troubleshooting', u'tests', u'enhancing', u'solving', u'develops', u'estimates', u'schedules', u'scopes', u'understands', u'technical', u'management', u'utilize', u'routine', u'conducts', u'hazards', u'utilizing', u'hazard', u'operability', u'methodologies', u'participates', u'startup', u'reviews', u'pssr', u'participate', u'teams', u'participate', u'regulatory', u'audits', u'define', u'scopes', u'budgets', u'schedules', u'technical', u'management', u'environmental', u'awareness', u'interfacing', u'personnel', u'interacts', u'regulatory', u'departments', u'input', u'objectives', u'identifying', u'introducing', u'concepts', u'solutions', u'peers', u'customers', u'coworkers', u'knowledge', u'skills', u'engineering', u'quality', u'engineering'],
-            [u'commissioning', u'startup', u'knowledge', u'simulators', u'technologies', u'knowledge', u'engineering', u'techniques', u'disciplines', u'leadership', u'skills', u'proven', u'engineers', u'oral', u'skills', u'technical', u'skills', u'analytically', u'solve', u'complex', u'interpret', u'proficiency', u'simulation', u'knowledge', u'applications', u'manipulate', u'applications', u'engineering'],
-            [u'calculations', u'programs', u'matlab', u'excel', u'independently', u'environment', u'proven', u'skills', u'effectively', u'multiple', u'tasks', u'planning', u'organizational', u'management', u'skills', u'rigzone', u'jobs', u'developer', u'exceptional', u'strategies', u'junction', u'exceptional', u'strategies', u'solutions', u'solutions', u'biggest', u'insurers', u'operates', u'investment'],
-            [u'vegas', u'tasks', u'electrical', u'contracting', u'expertise', u'virtually', u'electrical', u'developments', u'institutional', u'utilities', u'technical', u'experts', u'relationships', u'credibility', u'contractors', u'utility', u'customers', u'customer', u'relationships', u'consistently', u'innovations', u'profile', u'construct', u'envision', u'dynamic', u'complex', u'electrical', u'management', u'grad', u'internship', u'electrical', u'engineering', u'infrastructures', u'engineers', u'documented', u'management', u'engineering', u'quality', u'engineering', u'electrical', u'engineers', u'complex', u'distribution', u'grounding', u'estimation', u'testing', u'procedures', u'voltage', u'engineering'],
-            [u'troubleshooting', u'installation', u'documentation', u'bsee', u'certification', u'electrical', u'voltage', u'cabling', u'electrical', u'engineering', u'candidates', u'electrical', u'internships', u'oral', u'skills', u'organizational', u'prioritization', u'skills', u'skills', u'excel', u'cadd', u'calculation', u'autocad', u'mathcad', u'skills', u'skills', u'customer', u'relationships', u'solving', u'ethic', u'motivation', u'tasks', u'budget', u'affirmative', u'diversity', u'workforce', u'gender', u'orientation', u'disability', u'disabled', u'veteran', u'vietnam', u'veteran', u'qualifying', u'veteran', u'diverse', u'candidates', u'respond', u'developing', u'workplace', u'reflects', u'diversity', u'communities', u'reviews', u'electrical', u'contracting', u'southwest', u'electrical', u'contractors'],
-            [u'intern', u'electrical', u'engineering', u'idexx', u'laboratories', u'validating', u'idexx', u'integrated', u'hardware', u'entails', u'planning', u'debug', u'validation', u'engineers', u'validation', u'methodologies', u'healthcare', u'platforms', u'brightest', u'solve', u'challenges', u'innovation', u'technology', u'idexx', u'intern', u'idexx', u'interns', u'supplement', u'interns', u'teams', u'roles', u'competitive', u'interns', u'idexx', u'interns', u'participate', u'internships', u'mentors', u'seminars', u'topics', u'leadership', u'workshops', u'relevant', u'planning', u'topics', u'intern', u'presentations', u'mixers', u'applicants', u'ineligible', u'laboratory', u'compliant', u'idexx', u'laboratories', u'healthcare', u'innovation', u'practicing', u'veterinarians', u'diagnostic', u'technology', u'idexx', u'enhance', u'veterinarians', u'efficiency', u'economically', u'idexx', u'worldwide', u'diagnostic', u'tests', u'tests', u'quality', u'headquartered', u'idexx', u'laboratories', u'employs', u'customers', u'qualifications', u'applicants', u'idexx', u'interns', u'potential', u'demonstrated', u'portfolio', u'recommendation', u'resumes', u'marketing', u'location', u'americas', u'verification', u'validation', u'schedule', u'overtime', u'idexx', u'laboratories', u'reviews', u'idexx', u'laboratories', u'nasdaq', u'healthcare', u'innovation', u'practicing', u'veterinarians'],
-            [u'location', u'duration', u'temp', u'verification', u'validation', u'tester', u'verification', u'validation', u'middleware', u'specifically', u'testing', u'applications', u'clinical', u'laboratory', u'regulated', u'environment', u'responsibilities', u'complex', u'hardware', u'testing', u'clinical', u'analyzers', u'laboratory', u'graphical', u'interfaces', u'complex', u'sample', u'sequencing', u'protocols', u'developers', u'correction', u'tracking', u'tool', u'timely', u'troubleshoot', u'testing', u'functional', u'manual', u'automated', u'participate', u'ongoing'],
-            [u'testing', u'coverage', u'planning', u'documentation', u'testing', u'validation', u'corrections', u'monitor', u'implementation', u'recurrence', u'operating', u'statistical', u'quality', u'testing', u'global', u'multi', u'teams', u'travel', u'skills', u'concepts', u'waterfall', u'agile', u'methodologies', u'debugging', u'skills', u'complex', u'automated', u'instrumentation', u'environment', u'hardware', u'mechanical', u'components', u'tracking', u'lifecycle', u'management', u'quality', u'organize', u'define', u'priorities', u'organize', u'supervision', u'aggressive', u'deadlines', u'ambiguity', u'analyze', u'complex', u'situations', u'concepts', u'technologies', u'verbal', u'skills', u'effectively', u'technical', u'clinical', u'diverse', u'strategy', u'clinical', u'chemistry', u'analyzer', u'laboratory', u'middleware', u'basic', u'automated', u'testing', u'biomedical', u'engineering', u'technologists', u'laboratory', u'technology', u'availability', u'click', u'attach'],
-            [u'scientist', u'linux', u'asrc', u'scientist', u'linux', u'asrc', u'technology', u'solutions', u'subsidiary', u'asrc', u'engineering', u'technology', u'contracts'],
-            [u'multiple', u'agencies', u'scientists', u'engineers', u'management', u'personnel', u'allows', u'solutions', u'complex', u'aeronautics', u'aviation', u'management', u'aviation', u'engineering', u'hughes', u'technical', u'technical', u'aviation', u'evaluation', u'engineering', u'management', u'technical', u'terminal', u'surveillance', u'programs', u'currently', u'scientist', u'travel', u'responsibilities', u'develops', u'technology', u'modifies', u'technical', u'complex', u'reviews', u'draft', u'conformity', u'completeness', u'testing', u'interface', u'hardware', u'regression', u'impact', u'reliability', u'maintainability', u'factors', u'standardization', u'skills', u'travel', u'programming', u'linux', u'environment', u'cisco', u'knowledge', u'terminal', u'environment', u'clearance', u'clearance', u'input', u'output', u'digital', u'automatic', u'terminal', u'management', u'controller', u'termination', u'testing', u'evaluating', u'policies', u'procedure', u'interface', u'installation', u'verification', u'certification', u'core', u'avionic', u'programs', u'knowledge', u'procedural', u'testing', u'interfacing', u'hardware', u'regression', u'impact', u'reliability', u'maintainability', u'factors', u'standardization', u'missions', u'asrc', u'subsidiaries', u'affirmative', u'employers', u'applicants', u'disability', u'veteran', u'technology', u'location', u'airport', u'bachelor', u'schedule', u'travel', u'contributor', u'management', u'asrc', u'reviews'],
-            [u'technical', u'solarcity', u'niche', u'vegas', u'overview', u'resolving', u'customer', u'clients', u'expanding', u'engineers', u'developers', u'responsibilities', u'knowledge', u'planning', u'adapt', u'dynamic', u'environment', u'inventive', u'creative', u'solarcity', u'lifecycle', u'responsibilities', u'technical', u'analyzing', u'diagnosing', u'troubleshooting', u'customers', u'ticketing', u'console', u'escalate', u'knowledge', u'engineering', u'timely', u'basic', u'phone', u'functionality', u'customer', u'tracking', u'knowledgebase', u'rotation', u'configure', u'deployment', u'sccm', u'technical', u'deployment', u'deploy', u'hardware', u'solarcity', u'bachelor', u'knowledge', u'dell', u'laptops', u'analytical', u'troubleshooting', u'solving', u'skills', u'knowledge', u'databases', u'preferably', u'server', u'preferably', u'monitoring', u'suites', u'documentation', u'procedures', u'knowledge', u'entries', u'verbal', u'skills', u'customer', u'skills', u'competitive', u'solar', u'package', u'insurance', u'vacation', u'savings', u'referral', u'eligibility', u'equity', u'performers', u'solarcity', u'affirmative', u'diversity', u'workplace', u'applicants', u'orientation', u'disability', u'veteran', u'careerrookie'],
-            [u'embedded', u'exelis', u'junction', u'exelis', u'embedded', u'acquisition', u'networking', u'capabilities', u'classified', u'customer', u'motivated', u'develops', u'tests', u'innovative', u'solutions', u'minimal', u'supervision', u'paced', u'environment', u'enjoys', u'assignments', u'interact', u'multi', u'disciplined', u'challenging', u'focused', u'embedded', u'developments', u'spanning', u'engineering', u'lifecycle', u'specification', u'enhancement', u'applications', u'embedded', u'freescale', u'applications', u'android', u'platforms', u'interface', u'customers', u'developers', u'refine', u'specifications', u'architectures'],
-            [u'java', u'programming', u'scripts', u'python', u'debug', u'debugging', u'emulators', u'regression', u'revisions', u'specialized', u'setups', u'capabilities', u'subversion', u'technical', u'documentation', u'multiple', u'engineering', u'techexpousa', u'reviews'],
-            [u'modeler', u'semantic', u'modeling', u'models', u'skills', u'ontology', u'resource', u'framework', u'schema', u'technologies', u'hadoop', u'warehouse', u'oracle', u'relational', u'artifacts', u'models', u'dictionaries', u'models', u'interface', u'specifications', u'documentation', u'harmonization', u'mappings', u'aligned', u'coordinate', u'technical', u'peer', u'reviews', u'stakeholder', u'communities', u'impact', u'domains', u'relationships', u'interdependencies', u'models', u'define', u'analyze', u'legacy', u'models', u'corporate', u'databases', u'architectural', u'alignment', u'customer', u'expertise', u'harmonization', u'modeling', u'modeling', u'consulting', u'stakeholders', u'quality', u'models', u'storage', u'agile', u'specifically', u'focus', u'modeling', u'qualifications', u'bachelors', u'accredited', u'modeler', u'encompass', u'evaluation', u'skills', u'knowledge', u'modeling', u'techniques', u'resource', u'framework', u'schema', u'technologies', u'unified', u'modeling', u'technologies', u'schemas', u'ontologies', u'sybase', u'knowledge', u'skills', u'interpersonal', u'skills', u'customers', u'clearance', u'applicants', u'eligibility', u'classified', u'clearance', u'polygraph', u'techexpousa', u'solutions', u'partnership', u'solutions', u'integration'],
-            [u'technologies', u'junction', u'develops', u'maintains', u'enhances', u'complex', u'diverse', u'intensive', u'analytics', u'algorithm', u'manipulation', u'management', u'documented', u'individually', u'reviews', u'tests', u'components', u'adherence', u'resolves', u'utilizes', u'methodologies', u'environment', u'input', u'components', u'hardware', u'offs', u'reuse', u'cots', u'gots', u'synthesis', u'components', u'tasks', u'individually', u'analyzes', u'modifies', u'debugs', u'corrects', u'integrates', u'operating', u'environments', u'develops', u'queries', u'databases', u'repositories', u'recommendations', u'improving', u'documentation', u'develops', u'implements', u'algorithms', u'functional', u'assists', u'developing', u'executing', u'procedures', u'components', u'reviews', u'documentation', u'solutions', u'analyzing', u'conferring', u'users', u'engineers', u'analyzing', u'investigating', u'areas', u'adapt', u'hardware', u'mathematical', u'models', u'predict', u'outcome', u'implement', u'complex', u'database', u'repository', u'interfaces', u'queries', u'bachelors', u'accredited', u'substituted', u'bachelors', u'firewalls', u'ipsec', u'vpns', u'technology', u'administering', u'servers', u'apache', u'jboss', u'tomcat', u'developing', u'interfaces', u'firefox', u'internet', u'explorer', u'operating', u'mainframe', u'linux', u'solaris', u'virtual', u'scripting', u'programming', u'oriented', u'programming', u'ajax', u'script', u'procedures', u'cobol', u'cognos', u'fusion', u'focus', u'html', u'java', u'java', u'script', u'jquery', u'perl', u'visual', u'basic', u'powershell', u'cots', u'cots', u'oracle', u'apex', u'integration', u'competitive', u'package', u'bonus', u'corporate', u'equity', u'tuition', u'reimbursement', u'referral', u'bonus', u'holidays', u'insurance', u'flexible', u'disability', u'insurance'],
+            [u'senior', u'studios', u'studios', u'studios', u'creators', u'award', u'mobile', u'currently',
+             u'challenges', u'senior', u'summary', u'senior', u'motivated', u'creative', u'senior'],
+            [u'performs', u'engineering', u'tasks', u'infrastructure', u'focusing', u'primarily',
+             u'programming', u'interaction', u'designers', u'engineers', u'leadership', u'teams',
+             u'teams', u'crews', u'responsibilities', u'engineering', u'quality', u'functional',
+             u'functional', u'teams', u'organizing', u'prioritizing', u'technical', u'decisions',
+             u'engineering', u'participates', u'participates', u'reviews', u'participates',
+             u'hiring', u'conducting', u'interviews'],
+            [u'feedback', u'departments', u'define', u'focusing', u'engineering', u'teams', u'crews',
+             u'facilitate', u'engineering', u'departments', u'deadlines', u'milestones', u'typically',
+             u'spends', u'designing', u'developing', u'updating', u'bugs', u'mentoring', u'engineers',
+             u'define', u'schedules', u'milestones', u'participating'],
+            [u'reviews', u'interviews', u'sized', u'teams', u'interacts', u'disciplines', u'knowledge',
+             u'skills', u'knowledge', u'knowledge', u'xcode', u'scripting', u'debugging', u'skills',
+             u'skills', u'knowledge', u'disciplines', u'animation', u'networking', u'expertise',
+             u'competencies', u'oral', u'skills', u'management', u'skills', u'proven', u'effectively',
+             u'teams', u'deadline', u'environment', u'bachelor', u'minimum', u'shipped', u'leadership',
+             u'teams', u'location', u'resumes', u'jobs', u'candidates', u'openings', u'jobs'],
+            [u'maryland', u'client', u'producers', u'electricity', u'operates', u'storage', u'utility',
+             u'retail', u'customers', u'engineering', u'consultant', u'maryland', u'summary', u'technical',
+             u'technology', u'departments', u'expertise', u'maximizing', u'output', u'reduces', u'operating',
+             u'participates', u'areas', u'engineering', u'conducts', u'testing', u'solve', u'supports',
+             u'environmental', u'understands', u'objectives', u'operates', u'responsibilities', u'handles',
+             u'complex', u'engineering', u'aspects', u'monitors', u'quality', u'proficiency', u'optimization',
+             u'recommendations', u'supports', u'personnel', u'troubleshooting', u'commissioning', u'startup',
+             u'shutdown', u'supports', u'procedure', u'operating', u'units', u'develops', u'simulations',
+             u'troubleshooting', u'tests', u'enhancing', u'solving', u'develops', u'estimates', u'schedules',
+             u'scopes', u'understands', u'technical', u'management', u'utilize', u'routine', u'conducts',
+             u'hazards', u'utilizing', u'hazard', u'operability', u'methodologies', u'participates', u'startup',
+             u'reviews', u'pssr', u'participate', u'teams', u'participate', u'regulatory', u'audits', u'define',
+             u'scopes', u'budgets', u'schedules', u'technical', u'management', u'environmental', u'awareness',
+             u'interfacing', u'personnel', u'interacts', u'regulatory', u'departments', u'input', u'objectives',
+             u'identifying', u'introducing', u'concepts', u'solutions', u'peers', u'customers', u'coworkers',
+             u'knowledge', u'skills', u'engineering', u'quality', u'engineering'],
+            [u'commissioning', u'startup', u'knowledge', u'simulators', u'technologies', u'knowledge',
+             u'engineering', u'techniques', u'disciplines', u'leadership', u'skills', u'proven',
+             u'engineers', u'oral', u'skills', u'technical', u'skills', u'analytically', u'solve',
+             u'complex', u'interpret', u'proficiency', u'simulation', u'knowledge', u'applications',
+             u'manipulate', u'applications', u'engineering'],
+            [u'calculations', u'programs', u'matlab', u'excel', u'independently', u'environment',
+             u'proven', u'skills', u'effectively', u'multiple', u'tasks', u'planning', u'organizational',
+             u'management', u'skills', u'rigzone', u'jobs', u'developer', u'exceptional', u'strategies',
+             u'junction', u'exceptional', u'strategies', u'solutions', u'solutions', u'biggest',
+             u'insurers', u'operates', u'investment'],
+            [u'vegas', u'tasks', u'electrical', u'contracting', u'expertise', u'virtually', u'electrical',
+             u'developments', u'institutional', u'utilities', u'technical', u'experts', u'relationships',
+             u'credibility', u'contractors', u'utility', u'customers', u'customer', u'relationships',
+             u'consistently', u'innovations', u'profile', u'construct', u'envision', u'dynamic', u'complex',
+             u'electrical', u'management', u'grad', u'internship', u'electrical', u'engineering',
+             u'infrastructures', u'engineers', u'documented', u'management', u'engineering',
+             u'quality', u'engineering', u'electrical', u'engineers', u'complex', u'distribution',
+             u'grounding', u'estimation', u'testing', u'procedures', u'voltage', u'engineering'],
+            [u'troubleshooting', u'installation', u'documentation', u'bsee', u'certification',
+             u'electrical', u'voltage', u'cabling', u'electrical', u'engineering', u'candidates',
+             u'electrical', u'internships', u'oral', u'skills', u'organizational', u'prioritization',
+             u'skills', u'skills', u'excel', u'cadd', u'calculation', u'autocad', u'mathcad',
+             u'skills', u'skills', u'customer', u'relationships', u'solving', u'ethic', u'motivation',
+             u'tasks', u'budget', u'affirmative', u'diversity', u'workforce', u'gender', u'orientation',
+             u'disability', u'disabled', u'veteran', u'vietnam', u'veteran', u'qualifying', u'veteran',
+             u'diverse', u'candidates', u'respond', u'developing', u'workplace', u'reflects', u'diversity',
+             u'communities', u'reviews', u'electrical', u'contracting', u'southwest', u'electrical', u'contractors'],
+            [u'intern', u'electrical', u'engineering', u'idexx', u'laboratories', u'validating', u'idexx',
+             u'integrated', u'hardware', u'entails', u'planning', u'debug', u'validation', u'engineers',
+             u'validation', u'methodologies', u'healthcare', u'platforms', u'brightest', u'solve',
+             u'challenges', u'innovation', u'technology', u'idexx', u'intern', u'idexx', u'interns',
+             u'supplement', u'interns', u'teams', u'roles', u'competitive', u'interns', u'idexx',
+             u'interns', u'participate', u'internships', u'mentors', u'seminars', u'topics', u'leadership',
+             u'workshops', u'relevant', u'planning', u'topics', u'intern', u'presentations', u'mixers',
+             u'applicants', u'ineligible', u'laboratory', u'compliant', u'idexx', u'laboratories', u'healthcare',
+             u'innovation', u'practicing', u'veterinarians', u'diagnostic', u'technology', u'idexx', u'enhance',
+             u'veterinarians', u'efficiency', u'economically', u'idexx', u'worldwide', u'diagnostic', u'tests',
+             u'tests', u'quality', u'headquartered', u'idexx', u'laboratories', u'employs', u'customers',
+             u'qualifications', u'applicants', u'idexx', u'interns', u'potential', u'demonstrated', u'portfolio',
+             u'recommendation', u'resumes', u'marketing', u'location', u'americas', u'verification', u'validation',
+             u'schedule', u'overtime', u'idexx', u'laboratories', u'reviews', u'idexx', u'laboratories',
+             u'nasdaq', u'healthcare', u'innovation', u'practicing', u'veterinarians'],
+            [u'location', u'duration', u'temp', u'verification', u'validation', u'tester', u'verification',
+             u'validation', u'middleware', u'specifically', u'testing', u'applications', u'clinical',
+             u'laboratory', u'regulated', u'environment', u'responsibilities', u'complex', u'hardware',
+             u'testing', u'clinical', u'analyzers', u'laboratory', u'graphical', u'interfaces', u'complex',
+             u'sample', u'sequencing', u'protocols', u'developers', u'correction', u'tracking',
+             u'tool', u'timely', u'troubleshoot', u'testing', u'functional', u'manual',
+             u'automated', u'participate', u'ongoing'],
+            [u'testing', u'coverage', u'planning', u'documentation', u'testing', u'validation',
+             u'corrections', u'monitor', u'implementation', u'recurrence', u'operating', u'statistical',
+             u'quality', u'testing', u'global', u'multi', u'teams', u'travel', u'skills', u'concepts',
+             u'waterfall', u'agile', u'methodologies', u'debugging', u'skills', u'complex', u'automated',
+             u'instrumentation', u'environment', u'hardware', u'mechanical', u'components', u'tracking',
+             u'lifecycle', u'management', u'quality', u'organize', u'define', u'priorities', u'organize',
+             u'supervision', u'aggressive', u'deadlines', u'ambiguity', u'analyze', u'complex', u'situations',
+             u'concepts', u'technologies', u'verbal', u'skills', u'effectively', u'technical', u'clinical',
+             u'diverse', u'strategy', u'clinical', u'chemistry', u'analyzer', u'laboratory', u'middleware',
+             u'basic', u'automated', u'testing', u'biomedical', u'engineering', u'technologists',
+             u'laboratory', u'technology', u'availability', u'click', u'attach'],
+            [u'scientist', u'linux', u'asrc', u'scientist', u'linux', u'asrc', u'technology',
+             u'solutions', u'subsidiary', u'asrc', u'engineering', u'technology', u'contracts'],
+            [u'multiple', u'agencies', u'scientists', u'engineers', u'management', u'personnel',
+             u'allows', u'solutions', u'complex', u'aeronautics', u'aviation', u'management', u'aviation',
+             u'engineering', u'hughes', u'technical', u'technical', u'aviation', u'evaluation',
+             u'engineering', u'management', u'technical', u'terminal', u'surveillance', u'programs',
+             u'currently', u'scientist', u'travel', u'responsibilities', u'develops', u'technology',
+             u'modifies', u'technical', u'complex', u'reviews', u'draft', u'conformity', u'completeness',
+             u'testing', u'interface', u'hardware', u'regression', u'impact', u'reliability',
+             u'maintainability', u'factors', u'standardization', u'skills', u'travel', u'programming',
+             u'linux', u'environment', u'cisco', u'knowledge', u'terminal', u'environment', u'clearance',
+             u'clearance', u'input', u'output', u'digital', u'automatic', u'terminal', u'management',
+             u'controller', u'termination', u'testing', u'evaluating', u'policies', u'procedure', u'interface',
+             u'installation', u'verification', u'certification', u'core', u'avionic', u'programs', u'knowledge',
+             u'procedural', u'testing', u'interfacing', u'hardware', u'regression', u'impact',
+             u'reliability', u'maintainability', u'factors', u'standardization', u'missions', u'asrc', u'subsidiaries',
+             u'affirmative', u'employers', u'applicants', u'disability', u'veteran', u'technology', u'location',
+             u'airport', u'bachelor', u'schedule', u'travel', u'contributor', u'management', u'asrc', u'reviews'],
+            [u'technical', u'solarcity', u'niche', u'vegas', u'overview', u'resolving', u'customer',
+             u'clients', u'expanding', u'engineers', u'developers', u'responsibilities', u'knowledge',
+             u'planning', u'adapt', u'dynamic', u'environment', u'inventive', u'creative', u'solarcity',
+             u'lifecycle', u'responsibilities', u'technical', u'analyzing', u'diagnosing', u'troubleshooting',
+             u'customers', u'ticketing', u'console', u'escalate', u'knowledge', u'engineering', u'timely',
+             u'basic', u'phone', u'functionality', u'customer', u'tracking', u'knowledgebase', u'rotation',
+             u'configure', u'deployment', u'sccm', u'technical', u'deployment', u'deploy', u'hardware',
+             u'solarcity', u'bachelor', u'knowledge', u'dell', u'laptops', u'analytical', u'troubleshooting',
+             u'solving', u'skills', u'knowledge', u'databases', u'preferably', u'server', u'preferably',
+             u'monitoring', u'suites', u'documentation', u'procedures', u'knowledge', u'entries', u'verbal',
+             u'skills', u'customer', u'skills', u'competitive', u'solar', u'package', u'insurance', u'vacation',
+             u'savings', u'referral', u'eligibility', u'equity', u'performers', u'solarcity', u'affirmative',
+             u'diversity', u'workplace', u'applicants', u'orientation', u'disability', u'veteran', u'careerrookie'],
+            [u'embedded', u'exelis', u'junction', u'exelis', u'embedded', u'acquisition', u'networking',
+             u'capabilities', u'classified', u'customer', u'motivated', u'develops', u'tests',
+             u'innovative', u'solutions', u'minimal', u'supervision', u'paced', u'environment', u'enjoys',
+             u'assignments', u'interact', u'multi', u'disciplined', u'challenging', u'focused', u'embedded',
+             u'developments', u'spanning', u'engineering', u'lifecycle', u'specification', u'enhancement',
+             u'applications', u'embedded', u'freescale', u'applications', u'android', u'platforms',
+             u'interface', u'customers', u'developers', u'refine', u'specifications', u'architectures'],
+            [u'java', u'programming', u'scripts', u'python', u'debug', u'debugging', u'emulators',
+             u'regression', u'revisions', u'specialized', u'setups', u'capabilities', u'subversion',
+             u'technical', u'documentation', u'multiple', u'engineering', u'techexpousa', u'reviews'],
+            [u'modeler', u'semantic', u'modeling', u'models', u'skills', u'ontology', u'resource',
+             u'framework', u'schema', u'technologies', u'hadoop', u'warehouse', u'oracle', u'relational',
+             u'artifacts', u'models', u'dictionaries', u'models', u'interface', u'specifications',
+             u'documentation', u'harmonization', u'mappings', u'aligned', u'coordinate', u'technical',
+             u'peer', u'reviews', u'stakeholder', u'communities', u'impact', u'domains', u'relationships',
+             u'interdependencies', u'models', u'define', u'analyze', u'legacy', u'models', u'corporate',
+             u'databases', u'architectural', u'alignment', u'customer', u'expertise', u'harmonization',
+             u'modeling', u'modeling', u'consulting', u'stakeholders', u'quality', u'models', u'storage',
+             u'agile', u'specifically', u'focus', u'modeling', u'qualifications', u'bachelors', u'accredited',
+             u'modeler', u'encompass', u'evaluation', u'skills', u'knowledge', u'modeling', u'techniques',
+             u'resource', u'framework', u'schema', u'technologies', u'unified', u'modeling', u'technologies',
+             u'schemas', u'ontologies', u'sybase', u'knowledge', u'skills', u'interpersonal', u'skills',
+             u'customers', u'clearance', u'applicants', u'eligibility', u'classified', u'clearance',
+             u'polygraph', u'techexpousa', u'solutions', u'partnership', u'solutions', u'integration'],
+            [u'technologies', u'junction', u'develops', u'maintains', u'enhances', u'complex', u'diverse',
+             u'intensive', u'analytics', u'algorithm', u'manipulation', u'management', u'documented',
+             u'individually', u'reviews', u'tests', u'components', u'adherence', u'resolves', u'utilizes',
+             u'methodologies', u'environment', u'input', u'components', u'hardware', u'offs', u'reuse', u'cots',
+             u'gots', u'synthesis', u'components', u'tasks', u'individually', u'analyzes', u'modifies',
+             u'debugs', u'corrects', u'integrates', u'operating', u'environments', u'develops', u'queries',
+             u'databases', u'repositories', u'recommendations', u'improving', u'documentation', u'develops',
+             u'implements', u'algorithms', u'functional', u'assists', u'developing', u'executing', u'procedures',
+             u'components', u'reviews', u'documentation', u'solutions', u'analyzing', u'conferring',
+             u'users', u'engineers', u'analyzing', u'investigating', u'areas', u'adapt', u'hardware',
+             u'mathematical', u'models', u'predict', u'outcome', u'implement', u'complex', u'database',
+             u'repository', u'interfaces', u'queries', u'bachelors', u'accredited', u'substituted',
+             u'bachelors', u'firewalls', u'ipsec', u'vpns', u'technology', u'administering', u'servers',
+             u'apache', u'jboss', u'tomcat', u'developing', u'interfaces', u'firefox', u'internet',
+             u'explorer', u'operating', u'mainframe', u'linux', u'solaris', u'virtual', u'scripting',
+             u'programming', u'oriented', u'programming', u'ajax', u'script', u'procedures', u'cobol',
+             u'cognos', u'fusion', u'focus', u'html', u'java', u'java', u'script', u'jquery', u'perl',
+             u'visual', u'basic', u'powershell', u'cots', u'cots', u'oracle', u'apex', u'integration',
+             u'competitive', u'package', u'bonus', u'corporate', u'equity', u'tuition', u'reimbursement',
+             u'referral', u'bonus', u'holidays', u'insurance', u'flexible', u'disability', u'insurance'],
             [u'technologies', u'disability', u'accommodation', u'recruiter', u'techexpousa'],
             ['bank', 'river', 'shore', 'water'],
             ['river', 'water', 'flow', 'fast', 'tree'],
@@ -56,7 +206,10 @@ def setUp(self):
         sstats = np.loadtxt(datapath('sstats_test.txt'))
         dictionary = Dictionary(texts)
         corpus = [dictionary.doc2bow(text) for text in texts]
-        self.ldaseq = ldaseqmodel.LdaSeqModel(corpus=corpus, id2word=dictionary, num_topics=2, time_slice=[10, 10, 11], initialize='own', sstats=sstats)
+        self.ldaseq = ldaseqmodel.LdaSeqModel(
+            corpus=corpus, id2word=dictionary, num_topics=2,
+            time_slice=[10, 10, 11], initialize='own', sstats=sstats
+        )
 
     # testing topic word proportions
     def testTopicWord(self):
diff --git a/gensim/test/test_ldavowpalwabbit_wrapper.py b/gensim/test/test_ldavowpalwabbit_wrapper.py
index 3cf6f9f6bb..d14723de59 100644
--- a/gensim/test/test_ldavowpalwabbit_wrapper.py
+++ b/gensim/test/test_ldavowpalwabbit_wrapper.py
@@ -33,11 +33,11 @@
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
 TOPIC_WORDS = [
-'cat lion leopard mouse jaguar lynx cheetah tiger kitten puppy'.split(),
-'engine car wheel brakes tyre motor suspension cylinder exhaust clutch'.split(),
-'alice bob robert tim sue rachel dave harry alex jim'.split(),
-'c cplusplus go python haskell scala java ruby csharp erlang'.split(),
-'eggs ham mushrooms cereal coffee beans tea juice sausages bacon'.split()
+    'cat lion leopard mouse jaguar lynx cheetah tiger kitten puppy'.split(),
+    'engine car wheel brakes tyre motor suspension cylinder exhaust clutch'.split(),
+    'alice bob robert tim sue rachel dave harry alex jim'.split(),
+    'c cplusplus go python haskell scala java ruby csharp erlang'.split(),
+    'eggs ham mushrooms cereal coffee beans tea juice sausages bacon'.split()
 ]
 
 
@@ -71,30 +71,29 @@ def test_save_load(self):
         """Test loading/saving LdaVowpalWabbit model."""
         if not self.vw_path:  # for python 2.6
             return
-        lda = LdaVowpalWabbit(self.vw_path,
-                              corpus=self.corpus,
-                              passes=10,
-                              chunksize=256,
-                              id2word=self.dictionary,
-                              cleanup_files=True,
-                              alpha=0.1,
-                              eta=0.1,
-                              num_topics=len(TOPIC_WORDS),
-                              random_seed=1)
+        lda = LdaVowpalWabbit(
+            self.vw_path, corpus=self.corpus, passes=10, chunksize=256,
+            id2word=self.dictionary, cleanup_files=True, alpha=0.1,
+            eta=0.1, num_topics=len(TOPIC_WORDS), random_seed=1
+        )
 
         with tempfile.NamedTemporaryFile() as fhandle:
             lda.save(fhandle.name)
             lda2 = LdaVowpalWabbit.load(fhandle.name)
 
             # ensure public fields are saved/loaded correctly
-            saved_fields = [lda.alpha, lda.chunksize, lda.cleanup_files,
-                            lda.decay, lda.eta, lda.gamma_threshold,
-                            lda.id2word, lda.num_terms, lda.num_topics,
-                            lda.passes, lda.random_seed, lda.vw_path]
-            loaded_fields = [lda2.alpha, lda2.chunksize, lda2.cleanup_files,
-                             lda2.decay, lda2.eta, lda2.gamma_threshold,
-                             lda2.id2word, lda2.num_terms, lda2.num_topics,
-                             lda2.passes, lda2.random_seed, lda2.vw_path]
+            saved_fields = [
+                lda.alpha, lda.chunksize, lda.cleanup_files,
+                lda.decay, lda.eta, lda.gamma_threshold,
+                lda.id2word, lda.num_terms, lda.num_topics,
+                lda.passes, lda.random_seed, lda.vw_path
+            ]
+            loaded_fields = [
+                lda2.alpha, lda2.chunksize, lda2.cleanup_files,
+                lda2.decay, lda2.eta, lda2.gamma_threshold,
+                lda2.id2word, lda2.num_terms, lda2.num_topics,
+                lda2.passes, lda2.random_seed, lda2.vw_path
+            ]
             self.assertEqual(saved_fields, loaded_fields)
 
             # ensure topic matrices are saved/loaded correctly
@@ -106,16 +105,11 @@ def test_model_update(self):
         """Test updating existing LdaVowpalWabbit model."""
         if not self.vw_path:  # for python 2.6
             return
-        lda = LdaVowpalWabbit(self.vw_path,
-                              corpus=[self.corpus[0]],
-                              passes=10,
-                              chunksize=256,
-                              id2word=self.dictionary,
-                              cleanup_files=True,
-                              alpha=0.1,
-                              eta=0.1,
-                              num_topics=len(TOPIC_WORDS),
-                              random_seed=1)
+        lda = LdaVowpalWabbit(
+            self.vw_path, corpus=[self.corpus[0]], passes=10, chunksize=256,
+            id2word=self.dictionary, cleanup_files=True, alpha=0.1,
+            eta=0.1, num_topics=len(TOPIC_WORDS), random_seed=1
+        )
 
         lda.update(self.corpus[1:])
         result = lda.log_perplexity(self.corpus)
@@ -126,16 +120,10 @@ def test_perplexity(self):
         """Test LdaVowpalWabbit perplexity is within expected range."""
         if not self.vw_path:  # for python 2.6
             return
-        lda = LdaVowpalWabbit(self.vw_path,
-                              corpus=self.corpus,
-                              passes=10,
-                              chunksize=256,
-                              id2word=self.dictionary,
-                              cleanup_files=True,
-                              alpha=0.1,
-                              eta=0.1,
-                              num_topics=len(TOPIC_WORDS),
-                              random_seed=1)
+        lda = LdaVowpalWabbit(
+            self.vw_path, corpus=self.corpus, passes=10, chunksize=256,
+            id2word=self.dictionary, cleanup_files=True, alpha=0.1,
+            eta=0.1, num_topics=len(TOPIC_WORDS), random_seed=1)
 
         # varies, but should be between -1 and -5
         result = lda.log_perplexity(self.corpus)
@@ -147,16 +135,11 @@ def test_topic_coherence(self):
         if not self.vw_path:  # for python 2.6
             return
         corpus, dictionary = get_corpus()
-        lda = LdaVowpalWabbit(self.vw_path,
-                              corpus=corpus,
-                              passes=10,
-                              chunksize=256,
-                              id2word=dictionary,
-                              cleanup_files=True,
-                              alpha=0.1,
-                              eta=0.1,
-                              num_topics=len(TOPIC_WORDS),
-                              random_seed=1)
+        lda = LdaVowpalWabbit(
+            self.vw_path, corpus=corpus, passes=10, chunksize=256,
+            id2word=dictionary, cleanup_files=True, alpha=0.1,
+            eta=0.1, num_topics=len(TOPIC_WORDS), random_seed=1
+        )
         lda.print_topics(5, 10)
 
         # map words in known topic to an ID
@@ -198,11 +181,13 @@ def test_corpus_to_vw(self):
         """Test corpus to Vowpal Wabbit format conversion."""
         if not self.vw_path:  # for python 2.6
             return
-        corpus = [[(0, 5), (7, 1), (5, 3), (0, 2)],
-                  [(7, 2), (2, 1), (3, 11)],
-                  [(1, 1)],
-                  [],
-                  [(5, 2), (0, 1)]]
+        corpus = [
+            [(0, 5), (7, 1), (5, 3), (0, 2)],
+            [(7, 2), (2, 1), (3, 11)],
+            [(1, 1)],
+            [],
+            [(5, 2), (0, 1)]
+        ]
         expected = """
 | 0:5 7:1 5:3 0:2
 | 7:2 2:1 3:11
diff --git a/gensim/test/test_lee.py b/gensim/test/test_lee.py
index b51101c8b1..33cce71e52 100644
--- a/gensim/test/test_lee.py
+++ b/gensim/test/test_lee.py
@@ -63,7 +63,7 @@ def setUp(self):
         # read the human similarity data
         sim_matrix = np.loadtxt(os.path.join(pre_path, sim_file))
         sim_m_size = np.shape(sim_matrix)[0]
-        human_sim_vector = sim_matrix[matutils.triu_indices(sim_m_size, 1)]
+        human_sim_vector = sim_matrix[np.triu_indices(sim_m_size, 1)]
 
     def test_corpus(self):
         """availability and integrity of corpus"""
@@ -100,36 +100,12 @@ def test_lee(self):
         for i, par1 in enumerate(corpus_lsi):
             for j, par2 in enumerate(corpus_lsi):
                 res[i, j] = matutils.cossim(par1, par2)
-        flat = res[matutils.triu_indices(len(corpus), 1)]
+        flat = res[np.triu_indices(len(corpus), 1)]
 
         cor = np.corrcoef(flat, human_sim_vector)[0, 1]
-        logging.info("LSI correlation coefficient is %s" % cor)
+        logging.info("LSI correlation coefficient is %s", cor)
         self.assertTrue(cor > 0.6)
 
-    # def test_lee_mallet(self):
-    #     global bg_corpus, corpus, bg_corpus2, corpus2
-
-    #     # create a dictionary and corpus (bag of words)
-    #     dictionary = corpora.Dictionary(bg_corpus2)
-    #     bg_corpus = [dictionary.doc2bow(text) for text in bg_corpus2]
-    #     corpus = [dictionary.doc2bow(text) for text in corpus2]
-
-    #     # initialize an LDA transformation from background corpus
-    #     lda = models.wrappers.LdaMallet('/Users/kofola/Downloads/mallet-2.0.7/bin/mallet',
-    #         corpus=bg_corpus, id2word=dictionary, num_topics=200, optimize_interval=10)
-    #     corpus_lda = lda[corpus]
-
-    #     # compute pairwise similarity matrix and extract upper triangular
-    #     res = np.zeros((len(corpus), len(corpus)))
-    #     for i, par1 in enumerate(corpus_lda):
-    #         for j, par2 in enumerate(corpus_lda):
-    #             res[i, j] = matutils.cossim(par1, par2)
-    #     flat = res[matutils.triu_indices(len(corpus), 1)]
-
-    #     cor = np.corrcoef(flat, human_sim_vector)[0, 1]
-    #     logging.info("LDA correlation coefficient is %s" % cor)
-    #     self.assertTrue(cor > 0.35)
-
 
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
diff --git a/gensim/test/test_logentropy_model.py b/gensim/test/test_logentropy_model.py
index 07f982dad9..22ca09be0d 100644
--- a/gensim/test/test_logentropy_model.py
+++ b/gensim/test/test_logentropy_model.py
@@ -25,15 +25,17 @@
 
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [['human', 'interface', 'computer'],
- ['survey', 'user', 'computer', 'system', 'response', 'time'],
- ['eps', 'user', 'interface', 'system'],
- ['system', 'human', 'system', 'eps'],
- ['user', 'response', 'time'],
- ['trees'],
- ['graph', 'trees'],
- ['graph', 'minors', 'trees'],
- ['graph', 'minors', 'survey']]
+texts = [
+    ['human', 'interface', 'computer'],
+    ['survey', 'user', 'computer', 'system', 'response', 'time'],
+    ['eps', 'user', 'interface', 'system'],
+    ['system', 'human', 'system', 'eps'],
+    ['user', 'response', 'time'],
+    ['trees'],
+    ['graph', 'trees'],
+    ['graph', 'minors', 'trees'],
+    ['graph', 'minors', 'survey']
+]
 dictionary = Dictionary(texts)
 corpus = [dictionary.doc2bow(text) for text in texts]
 
@@ -56,9 +58,11 @@ def testTransform(self):
         doc = list(self.corpus_ok)[0]
         transformed = model[doc]
 
-        expected = [(0, 0.3748900964125389),
-                    (1, 0.30730215324230725),
-                    (3, 1.20941755462856)]
+        expected = [
+            (0, 0.3748900964125389),
+            (1, 0.30730215324230725),
+            (3, 1.20941755462856)
+        ]
         self.assertTrue(np.allclose(transformed, expected))
 
     def testPersistence(self):
@@ -78,7 +82,6 @@ def testPersistenceCompressed(self):
         self.assertTrue(model.entr == model2.entr)
         tstvec = []
         self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))
-# endclass TestLogEntropyModel
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_lsimodel.py b/gensim/test/test_lsimodel.py
index e2c32bda66..6a1d2ef995 100644
--- a/gensim/test/test_lsimodel.py
+++ b/gensim/test/test_lsimodel.py
@@ -21,7 +21,7 @@
 from gensim import matutils
 from gensim.corpora import mmcorpus, Dictionary
 from gensim.models import lsimodel
-from gensim.test import basetests
+from gensim.test import basetmtests
 
 module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 
@@ -31,15 +31,17 @@ def datapath(fname):
 
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [['human', 'interface', 'computer'],
-         ['survey', 'user', 'computer', 'system', 'response', 'time'],
-         ['eps', 'user', 'interface', 'system'],
-         ['system', 'human', 'system', 'eps'],
-         ['user', 'response', 'time'],
-         ['trees'],
-         ['graph', 'trees'],
-         ['graph', 'minors', 'trees'],
-         ['graph', 'minors', 'survey']]
+texts = [
+    ['human', 'interface', 'computer'],
+    ['survey', 'user', 'computer', 'system', 'response', 'time'],
+    ['eps', 'user', 'interface', 'system'],
+    ['system', 'human', 'system', 'eps'],
+    ['user', 'response', 'time'],
+    ['trees'],
+    ['graph', 'trees'],
+    ['graph', 'minors', 'trees'],
+    ['graph', 'minors', 'survey']
+]
 dictionary = Dictionary(texts)
 corpus = [dictionary.doc2bow(text) for text in texts]
 
@@ -49,7 +51,7 @@ def testfile():
     return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
 
 
-class TestLsiModel(unittest.TestCase, basetests.TestBaseTopicModel):
+class TestLsiModel(unittest.TestCase, basetmtests.TestBaseTopicModel):
     def setUp(self):
         self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
         self.model = lsimodel.LsiModel(self.corpus, num_topics=2)
@@ -84,7 +86,8 @@ def testCorpusTransform(self):
             [0.01274618, -0.49016181],
             [0.04888203, -1.11294699],
             [0.08063836, -1.56345594],
-            [0.27381003, -1.34694159]])
+            [0.27381003, -1.34694159]
+        ])
         self.assertTrue(np.allclose(abs(got), abs(expected)))  # must equal up to sign
 
     def testOnlineTransform(self):
@@ -178,7 +181,7 @@ def testDocsProcessed(self):
         self.assertEqual(self.model.docs_processed, 9)
         self.assertEqual(self.model.docs_processed, self.corpus.num_docs)
 
-    def testGetTopics(self):
+    def test_get_topics(self):
         topics = self.model.get_topics()
         vocab_size = len(self.model.id2word)
         for topic in topics:
@@ -188,8 +191,6 @@ def testGetTopics(self):
             # LSI topics are not probability distributions
             # self.assertAlmostEqual(np.sum(topic), 1.0, 5)
 
-# endclass TestLsiModel
-
 
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
diff --git a/gensim/test/test_normmodel.py b/gensim/test/test_normmodel.py
index 77221b4a4d..339680d085 100644
--- a/gensim/test/test_normmodel.py
+++ b/gensim/test/test_normmodel.py
@@ -65,18 +65,22 @@ def test_sparseCSRInput_l1(self):
 
     def test_numpyndarrayInput_l1(self):
         """Test for np ndarray input for l1 transformation"""
-        ndarray_matrix = np.array([[1, 0, 2],
-                                      [0, 0, 3],
-                                      [4, 5, 6]])
+        ndarray_matrix = np.array([
+            [1, 0, 2],
+            [0, 0, 3],
+            [4, 5, 6]
+        ])
         normalized = self.model_l1.normalize(ndarray_matrix)
 
         # Check if output is of same type
         self.assertTrue(isinstance(normalized, np.ndarray))
 
         # Check if output is correct
-        expected = np.array([[0.04761905, 0., 0.0952381],
-                                [0., 0., 0.14285714],
-                                [0.19047619, 0.23809524, 0.28571429]])
+        expected = np.array([
+            [0.04761905, 0., 0.0952381],
+            [0., 0., 0.14285714],
+            [0.19047619, 0.23809524, 0.28571429]
+        ])
         self.assertTrue(np.allclose(normalized, expected))
 
         # Test if error is raised on unsupported input type
@@ -101,25 +105,31 @@ def test_sparseCSRInput_l2(self):
         self.assertTrue(issparse(normalized))
 
         # Check if output is correct
-        expected = np.array([[0.10482848, 0., 0.20965697],
-                                [0., 0., 0.31448545],
-                                [0.41931393, 0.52414242, 0.6289709]])
+        expected = np.array([
+            [0.10482848, 0., 0.20965697],
+            [0., 0., 0.31448545],
+            [0.41931393, 0.52414242, 0.6289709]
+        ])
         self.assertTrue(np.allclose(normalized.toarray(), expected))
 
     def test_numpyndarrayInput_l2(self):
         """Test for np ndarray input for l2 transformation"""
-        ndarray_matrix = np.array([[1, 0, 2],
-                                      [0, 0, 3],
-                                      [4, 5, 6]])
+        ndarray_matrix = np.array([
+            [1, 0, 2],
+            [0, 0, 3],
+            [4, 5, 6]
+        ])
         normalized = self.model_l2.normalize(ndarray_matrix)
 
         # Check if output is of same type
         self.assertTrue(isinstance(normalized, np.ndarray))
 
         # Check if output is correct
-        expected = np.array([[0.10482848, 0., 0.20965697],
-                                [0., 0., 0.31448545],
-                                [0.41931393, 0.52414242, 0.6289709]])
+        expected = np.array([
+            [0.10482848, 0., 0.20965697],
+            [0., 0., 0.31448545],
+            [0.41931393, 0.52414242, 0.6289709]
+        ])
         self.assertTrue(np.allclose(normalized, expected))
 
         # Test if error is raised on unsupported input type
diff --git a/gensim/test/test_parsing.py b/gensim/test/test_parsing.py
index fd0429e9f6..02ca13fb6b 100644
--- a/gensim/test/test_parsing.py
+++ b/gensim/test/test_parsing.py
@@ -36,8 +36,7 @@
 for many searching purposes, a little fuzziness would help. """
 
 
-dataset = map(lambda x: strip_punctuation2(x.lower()),
-        [doc1, doc2, doc3, doc4])
+dataset = [strip_punctuation2(x.lower()) for x in [doc1, doc2, doc3, doc4]]
 # doc1 and doc2 have class 0, doc3 and doc4 avec class 1
 classes = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])
 
@@ -45,40 +44,33 @@
 class TestPreprocessing(unittest.TestCase):
 
     def testStripNumeric(self):
-        self.assertEqual(strip_numeric("salut les amis du 59"),
-                          "salut les amis du ")
+        self.assertEqual(strip_numeric("salut les amis du 59"), "salut les amis du ")
 
     def testStripShort(self):
-        self.assertEqual(strip_short("salut les amis du 59", 3),
-                          "salut les amis")
+        self.assertEqual(strip_short("salut les amis du 59", 3), "salut les amis")
 
     def testStripTags(self):
-        self.assertEqual(strip_tags("<i>Hello</i> <b>World</b>!"),
-                          "Hello World!")
+        self.assertEqual(strip_tags("<i>Hello</i> <b>World</b>!"), "Hello World!")
 
     def testStripMultipleWhitespaces(self):
-        self.assertEqual(strip_multiple_whitespaces("salut  les\r\nloulous!"),
-                          "salut les loulous!")
+        self.assertEqual(strip_multiple_whitespaces("salut  les\r\nloulous!"), "salut les loulous!")
 
     def testStripNonAlphanum(self):
-        self.assertEqual(strip_non_alphanum("toto nf-kappa titi"),
-                          "toto nf kappa titi")
+        self.assertEqual(strip_non_alphanum("toto nf-kappa titi"), "toto nf kappa titi")
 
     def testSplitAlphanum(self):
-        self.assertEqual(split_alphanum("toto diet1 titi"),
-                          "toto diet 1 titi")
-        self.assertEqual(split_alphanum("toto 1diet titi"),
-                          "toto 1 diet titi")
+        self.assertEqual(split_alphanum("toto diet1 titi"), "toto diet 1 titi")
+        self.assertEqual(split_alphanum("toto 1diet titi"), "toto 1 diet titi")
 
     def testStripStopwords(self):
-        self.assertEqual(remove_stopwords("the world is square"),
-                          "world square")
+        self.assertEqual(remove_stopwords("the world is square"), "world square")
 
     def testStemText(self):
-        target = "while it is quit us to be abl to search a larg " + \
-                "collect of document almost instantli for a joint occurr " + \
-                "of a collect of exact words, for mani search purposes, " + \
-                "a littl fuzzi would help."
+        target = \
+            "while it is quit us to be abl to search a larg " + \
+            "collect of document almost instantli for a joint occurr " + \
+            "of a collect of exact words, for mani search purposes, " + \
+            "a littl fuzzi would help."
         self.assertEqual(stem_text(doc5), target)
 
 
diff --git a/gensim/test/test_phrases.py b/gensim/test/test_phrases.py
index 5397d6e4c3..868947defb 100644
--- a/gensim/test/test_phrases.py
+++ b/gensim/test/test_phrases.py
@@ -133,11 +133,7 @@ def testExportPhrases(self):
         for phrase, score in bigram.export_phrases(sentences):
             seen_bigrams.add(phrase)
 
-        assert seen_bigrams == set([
-            b'response time',
-            b'graph minors',
-            b'human interface'
-        ])
+        assert seen_bigrams == {b'response time', b'graph minors', b'human interface'}
 
     def testMultipleBigramsSingleEntry(self):
         """ a single entry should produce multiple bigrams. """
@@ -149,10 +145,7 @@ def testMultipleBigramsSingleEntry(self):
         for phrase, score in bigram.export_phrases(test_sentences):
             seen_bigrams.add(phrase)
 
-        assert seen_bigrams == set([
-            b'graph minors',
-            b'human interface'
-        ])
+        assert seen_bigrams == {b'graph minors', b'human interface'}
 
     def testScoringDefault(self):
         """ test the default scoring, from the mikolov word2vec paper """
@@ -164,10 +157,10 @@ def testScoringDefault(self):
         for phrase, score in bigram.export_phrases(test_sentences):
             seen_scores.add(round(score, 3))
 
-        assert seen_scores == set([
+        assert seen_scores == {
             5.167,  # score for graph minors
             3.444  # score for human interface
-        ])
+        }
 
     def testScoringNpmi(self):
         """ test normalized pointwise mutual information scoring """
@@ -179,10 +172,10 @@ def testScoringNpmi(self):
         for phrase, score in bigram.export_phrases(test_sentences):
             seen_scores.add(round(score, 3))
 
-        assert seen_scores == set([
+        assert seen_scores == {
             .882,  # score for graph minors
             .714  # score for human interface
-        ])
+        }
 
     def testBadParameters(self):
         """Test the phrases module with bad parameters."""
diff --git a/gensim/test/test_rpmodel.py b/gensim/test/test_rpmodel.py
index 2de5dd6546..94c1abce84 100644
--- a/gensim/test/test_rpmodel.py
+++ b/gensim/test/test_rpmodel.py
@@ -26,15 +26,17 @@
 
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [['human', 'interface', 'computer'],
- ['survey', 'user', 'computer', 'system', 'response', 'time'],
- ['eps', 'user', 'interface', 'system'],
- ['system', 'human', 'system', 'eps'],
- ['user', 'response', 'time'],
- ['trees'],
- ['graph', 'trees'],
- ['graph', 'minors', 'trees'],
- ['graph', 'minors', 'survey']]
+texts = [
+    ['human', 'interface', 'computer'],
+    ['survey', 'user', 'computer', 'system', 'response', 'time'],
+    ['eps', 'user', 'interface', 'system'],
+    ['system', 'human', 'system', 'eps'],
+    ['user', 'response', 'time'],
+    ['trees'],
+    ['graph', 'trees'],
+    ['graph', 'minors', 'trees'],
+    ['graph', 'minors', 'survey']
+]
 dictionary = Dictionary(texts)
 corpus = [dictionary.doc2bow(text) for text in texts]
 
@@ -80,7 +82,6 @@ def testPersistenceCompressed(self):
         self.assertTrue(np.allclose(model.projection, model2.projection))
         tstvec = []
         self.assertTrue(np.allclose(model[tstvec], model2[tstvec]))  # try projecting an empty vector
-# endclass TestRpModel
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_segmentation.py b/gensim/test/test_segmentation.py
index a4c9356a26..512121a055 100644
--- a/gensim/test/test_segmentation.py
+++ b/gensim/test/test_segmentation.py
@@ -20,30 +20,40 @@
 
 class TestSegmentation(unittest.TestCase):
     def setUp(self):
-        self.topics = [array([9, 4, 6]), array([9, 10, 7]), array([5, 2, 7])]
+        self.topics = [
+            array([9, 4, 6]),
+            array([9, 10, 7]),
+            array([5, 2, 7])
+        ]
 
     def testSOnePre(self):
         """Test s_one_pre segmentation."""
         actual = segmentation.s_one_pre(self.topics)
-        expected = [[(4, 9), (6, 9), (6, 4)],
-                    [(10, 9), (7, 9), (7, 10)],
-                    [(2, 5), (7, 5), (7, 2)]]
+        expected = [
+            [(4, 9), (6, 9), (6, 4)],
+            [(10, 9), (7, 9), (7, 10)],
+            [(2, 5), (7, 5), (7, 2)]
+        ]
         self.assertTrue(np.allclose(actual, expected))
 
     def testSOneOne(self):
         """Test s_one_one segmentation."""
         actual = segmentation.s_one_one(self.topics)
-        expected = [[(9, 4), (9, 6), (4, 9), (4, 6), (6, 9), (6, 4)],
-                    [(9, 10), (9, 7), (10, 9), (10, 7), (7, 9), (7, 10)],
-                    [(5, 2), (5, 7), (2, 5), (2, 7), (7, 5), (7, 2)]]
+        expected = [
+            [(9, 4), (9, 6), (4, 9), (4, 6), (6, 9), (6, 4)],
+            [(9, 10), (9, 7), (10, 9), (10, 7), (7, 9), (7, 10)],
+            [(5, 2), (5, 7), (2, 5), (2, 7), (7, 5), (7, 2)]
+        ]
         self.assertTrue(np.allclose(actual, expected))
 
     def testSOneSet(self):
         """Test s_one_set segmentation."""
         actual = segmentation.s_one_set(self.topics)
-        expected = [[(9, array([9, 4, 6])), (4, array([9, 4, 6])), (6, array([9, 4, 6]))],
-                    [(9, array([9, 10, 7])), (10, array([9, 10, 7])), (7, array([9, 10, 7]))],
-                    [(5, array([5, 2, 7])), (2, array([5, 2, 7])), (7, array([5, 2, 7]))]]
+        expected = [
+            [(9, array([9, 4, 6])), (4, array([9, 4, 6])), (6, array([9, 4, 6]))],
+            [(9, array([9, 10, 7])), (10, array([9, 10, 7])), (7, array([9, 10, 7]))],
+            [(5, array([5, 2, 7])), (2, array([5, 2, 7])), (7, array([5, 2, 7]))]
+        ]
         for s_i in range(len(actual)):
             for j in range(len(actual[s_i])):
                 self.assertEqual(actual[s_i][j][0], expected[s_i][j][0])
diff --git a/gensim/test/test_sharded_corpus.py b/gensim/test/test_sharded_corpus.py
index 871048ea4e..cc70ee8f49 100644
--- a/gensim/test/test_sharded_corpus.py
+++ b/gensim/test/test_sharded_corpus.py
@@ -87,8 +87,7 @@ def test_sparse_serialization(self):
 
         no_exception = True
         try:
-            dataset = ShardedCorpus(self.tmp_fname, self.data, shardsize=100,  # noqa:F841
-                                    dim=self.dim, sparse_serialization=True)
+            ShardedCorpus(self.tmp_fname, self.data, shardsize=100, dim=self.dim, sparse_serialization=True)
         except Exception:
             no_exception = False
             raise
@@ -97,9 +96,10 @@ def test_sparse_serialization(self):
 
     def test_getitem_dense2dense(self):
 
-        corpus = ShardedCorpus(self.tmp_fname, self.data, shardsize=100,
-                               dim=self.dim, sparse_serialization=False,
-                               sparse_retrieval=False)
+        corpus = ShardedCorpus(
+            self.tmp_fname, self.data, shardsize=100, dim=self.dim,
+            sparse_serialization=False, sparse_retrieval=False
+        )
 
         item = corpus[3]
         self.assertTrue(isinstance(item, np.ndarray))
@@ -117,9 +117,10 @@ def test_getitem_dense2dense(self):
 
     def test_getitem_dense2sparse(self):
 
-        corpus = ShardedCorpus(self.tmp_fname, self.data, shardsize=100,
-                               dim=self.dim, sparse_serialization=False,
-                               sparse_retrieval=True)
+        corpus = ShardedCorpus(
+            self.tmp_fname, self.data, shardsize=100, dim=self.dim,
+            sparse_serialization=False, sparse_retrieval=True
+        )
 
         item = corpus[3]
         self.assertTrue(isinstance(item, sparse.csr_matrix))
@@ -138,13 +139,15 @@ def test_getitem_dense2sparse(self):
     def test_getitem_sparse2sparse(self):
 
         sp_tmp_fname = self.tmp_fname + '.sparse'
-        corpus = ShardedCorpus(sp_tmp_fname, self.data, shardsize=100,
-                               dim=self.dim, sparse_serialization=True,
-                               sparse_retrieval=True)
+        corpus = ShardedCorpus(
+            sp_tmp_fname, self.data, shardsize=100, dim=self.dim,
+            sparse_serialization=True, sparse_retrieval=True
+        )
 
-        dense_corpus = ShardedCorpus(self.tmp_fname, self.data, shardsize=100,
-                                     dim=self.dim, sparse_serialization=False,
-                                     sparse_retrieval=True)
+        dense_corpus = ShardedCorpus(
+            self.tmp_fname, self.data, shardsize=100, dim=self.dim,
+            sparse_serialization=False, sparse_retrieval=True
+        )
 
         item = corpus[3]
         self.assertTrue(isinstance(item, sparse.csr_matrix))
@@ -168,13 +171,15 @@ def test_getitem_sparse2sparse(self):
 
     def test_getitem_sparse2dense(self):
         sp_tmp_fname = self.tmp_fname + '.sparse'
-        corpus = ShardedCorpus(sp_tmp_fname, self.data, shardsize=100,
-                               dim=self.dim, sparse_serialization=True,
-                               sparse_retrieval=False)
+        corpus = ShardedCorpus(
+            sp_tmp_fname, self.data, shardsize=100, dim=self.dim,
+            sparse_serialization=True, sparse_retrieval=False
+        )
 
-        dense_corpus = ShardedCorpus(self.tmp_fname, self.data, shardsize=100,
-                                     dim=self.dim, sparse_serialization=False,
-                                     sparse_retrieval=False)
+        dense_corpus = ShardedCorpus(
+            self.tmp_fname, self.data, shardsize=100, dim=self.dim,
+            sparse_serialization=False, sparse_retrieval=False
+        )
 
         item = corpus[3]
         self.assertTrue(isinstance(item, np.ndarray))
@@ -195,9 +200,10 @@ def test_getitem_sparse2dense(self):
 
     def test_getitem_dense2gensim(self):
 
-        corpus = ShardedCorpus(self.tmp_fname, self.data, shardsize=100,
-                               dim=self.dim, sparse_serialization=False,
-                               gensim=True)
+        corpus = ShardedCorpus(
+            self.tmp_fname, self.data, shardsize=100, dim=self.dim,
+            sparse_serialization=False, gensim=True
+        )
 
         item = corpus[3]
         self.assertTrue(isinstance(item, list))
@@ -211,8 +217,7 @@ def test_getitem_dense2gensim(self):
         self.assertTrue(isinstance(dslice[0][0], tuple))
 
         iscorp, _ = is_corpus(dslice)
-        self.assertTrue(iscorp, "Is the object returned by slice notation "
-                                "a gensim corpus?")
+        self.assertTrue(iscorp, "Is the object returned by slice notation a gensim corpus?")
 
         ilist = corpus[[2, 3, 4, 5]]
         self.assertTrue(next(ilist) == corpus[2])
@@ -235,8 +240,7 @@ def test_getitem_dense2gensim(self):
                                      str(dslice[i][j])))
 
         iscorp, _ = is_corpus(ilist)
-        self.assertTrue(iscorp, "Is the object returned by list notation "
-                                "a gensim corpus?")
+        self.assertTrue(iscorp, "Is the object returned by list notation a gensim corpus?")
 
     def test_resize(self):
 
@@ -252,8 +256,6 @@ def test_resize(self):
             fname = dataset._shard_name(n)
             self.assertTrue(os.path.isfile(fname))
 
-##############################################################################
-
 
 if __name__ == '__main__':
     suite = unittest.TestSuite()
diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py
index 9ce5263df7..93c0f8a3f7 100644
--- a/gensim/test/test_similarities.py
+++ b/gensim/test/test_similarities.py
@@ -36,20 +36,21 @@
 
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [['human', 'interface', 'computer'],
-         ['survey', 'user', 'computer', 'system', 'response', 'time'],
-         ['eps', 'user', 'interface', 'system'],
-         ['system', 'human', 'system', 'eps'],
-         ['user', 'response', 'time'],
-         ['trees'],
-         ['graph', 'trees'],
-         ['graph', 'minors', 'trees'],
-         ['graph', 'minors', 'survey']]
+texts = [
+    ['human', 'interface', 'computer'],
+    ['survey', 'user', 'computer', 'system', 'response', 'time'],
+    ['eps', 'user', 'interface', 'system'],
+    ['system', 'human', 'system', 'eps'],
+    ['user', 'response', 'time'],
+    ['trees'],
+    ['graph', 'trees'],
+    ['graph', 'minors', 'trees'],
+    ['graph', 'minors', 'survey']
+]
 dictionary = Dictionary(texts)
 corpus = [dictionary.doc2bow(text) for text in texts]
 
-sentences = [doc2vec.TaggedDocument(words, [i])
-             for i, words in enumerate(texts)]
+sentences = [doc2vec.TaggedDocument(words, [i]) for i, words in enumerate(texts)]
 
 
 def testfile():
@@ -78,7 +79,7 @@ def testFull(self, num_best=None, shardsize=100):
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.70710677, 0.70710677, 0.0],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.57735026, 0.57735026],
                 [0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.57735026],
-                ], dtype=numpy.float32)
+            ], dtype=numpy.float32)
             # HACK: dictionary can be in different order, so compare in sorted order
             self.assertTrue(numpy.allclose(sorted(expected.flat), sorted(index.index.flat)))
         index.num_best = num_best
@@ -137,15 +138,17 @@ def testChunking(self):
             [0.99999994, 0.23570226, 0.28867513, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.0],
             [0.23570226, 1.0, 0.40824831, 0.33333334, 0.70710677, 0.0, 0.0, 0.0, 0.23570226],
             [0.28867513, 0.40824831, 1.0, 0.61237246, 0.28867513, 0.0, 0.0, 0.0, 0.0]
-            ], dtype=numpy.float32)
+        ], dtype=numpy.float32)
         self.assertTrue(numpy.allclose(expected, sims))
 
         # test the same thing but with num_best
         index.num_best = 3
         sims = index[query]
-        expected = [[(0, 0.99999994), (2, 0.28867513), (1, 0.23570226)],
-                    [(1, 1.0), (4, 0.70710677), (2, 0.40824831)],
-                    [(2, 1.0), (3, 0.61237246), (1, 0.40824831)]]
+        expected = [
+            [(0, 0.99999994), (2, 0.28867513), (1, 0.23570226)],
+            [(1, 1.0), (4, 0.70710677), (2, 0.40824831)],
+            [(2, 1.0), (3, 0.61237246), (1, 0.40824831)]
+        ]
         self.assertTrue(numpy.allclose(expected, sims))
         if self.cls == similarities.Similarity:
             index.destroy()
@@ -166,7 +169,7 @@ def testIter(self):
             [0.0, 0.0, 0.0, 0.0, 0.0, 0.70710677, 0.99999994, 0.81649655, 0.40824828],
             [0.0, 0.0, 0.0, 0.0, 0.0, 0.57735026, 0.81649655, 0.99999994, 0.66666663],
             [0.0, 0.23570226, 0.0, 0.0, 0.0, 0.0, 0.40824828, 0.66666663, 0.99999994]
-            ], dtype=numpy.float32)
+        ], dtype=numpy.float32)
         self.assertTrue(numpy.allclose(expected, sims))
         if self.cls == similarities.Similarity:
             index.destroy()
diff --git a/gensim/test/test_similarity_metrics.py b/gensim/test/test_similarity_metrics.py
index 23bf0e60f7..27066ff09d 100644
--- a/gensim/test/test_similarity_metrics.py
+++ b/gensim/test/test_similarity_metrics.py
@@ -24,15 +24,17 @@
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [['human', 'interface', 'computer'],
- ['survey', 'user', 'computer', 'system', 'response', 'time'],
- ['eps', 'user', 'interface', 'system'],
- ['system', 'human', 'system', 'eps'],
- ['user', 'response', 'time'],
- ['trees'],
- ['graph', 'trees'],
- ['graph', 'minors', 'trees'],
- ['graph', 'minors', 'survey']]
+texts = [
+    ['human', 'interface', 'computer'],
+    ['survey', 'user', 'computer', 'system', 'response', 'time'],
+    ['eps', 'user', 'interface', 'system'],
+    ['system', 'human', 'system', 'eps'],
+    ['user', 'response', 'time'],
+    ['trees'],
+    ['graph', 'trees'],
+    ['graph', 'minors', 'trees'],
+    ['graph', 'minors', 'survey']
+]
 dictionary = Dictionary(texts)
 corpus = [dictionary.doc2bow(text) for text in texts]
 
diff --git a/gensim/test/test_sklearn_api.py b/gensim/test/test_sklearn_api.py
index 0e17905c2b..07411aa9b9 100644
--- a/gensim/test/test_sklearn_api.py
+++ b/gensim/test/test_sklearn_api.py
@@ -43,10 +43,19 @@
 ]
 dictionary = Dictionary(texts)
 corpus = [dictionary.doc2bow(text) for text in texts]
-author2doc = {'john': [0, 1, 2, 3, 4, 5, 6], 'jane': [2, 3, 4, 5, 6, 7, 8], 'jack': [0, 2, 4, 6, 8], 'jill': [1, 3, 5, 7]}
+author2doc = {
+    'john': [0, 1, 2, 3, 4, 5, 6],
+    'jane': [2, 3, 4, 5, 6, 7, 8],
+    'jack': [0, 2, 4, 6, 8],
+    'jill': [1, 3, 5, 7]
+}
 
 texts_new = texts[0:3]
-author2doc_new = {'jill': [0], 'bob': [0, 1], 'sally': [1, 2]}
+author2doc_new = {
+    'jill': [0],
+    'bob': [0, 1],
+    'sally': [1, 2]
+}
 dictionary_new = Dictionary(texts_new)
 corpus_new = [dictionary_new.doc2bow(text) for text in texts_new]
 
@@ -91,11 +100,16 @@
     ['geometry', 'is', 'the', 'study', 'of', 'shape'],
     ['algebra', 'is', 'the', 'study', 'of', 'generalizations', 'of', 'arithmetic', 'operations'],
     ['differential', 'calculus', 'is', 'related', 'to', 'rates', 'of', 'change', 'and', 'slopes', 'of', 'curves'],
-    ['integral', 'calculus', 'is', 'realted', 'to', 'accumulation', 'of', 'quantities', 'and', 'the', 'areas', 'under', 'and', 'between', 'curves'],
-    ['physics', 'is', 'the', 'natural', 'science', 'that', 'involves', 'the', 'study', 'of', 'matter', 'and', 'its', 'motion', 'and', 'behavior', 'through', 'space', 'and', 'time'],
+    ['integral', 'calculus', 'is', 'realted', 'to', 'accumulation', 'of', 'quantities', 'and',
+     'the', 'areas', 'under', 'and', 'between', 'curves'],
+    ['physics', 'is', 'the', 'natural', 'science', 'that', 'involves', 'the', 'study', 'of', 'matter',
+     'and', 'its', 'motion', 'and', 'behavior', 'through', 'space', 'and', 'time'],
     ['the', 'main', 'goal', 'of', 'physics', 'is', 'to', 'understand', 'how', 'the', 'universe', 'behaves'],
-    ['physics', 'also', 'makes', 'significant', 'contributions', 'through', 'advances', 'in', 'new', 'technologies', 'that', 'arise', 'from', 'theoretical', 'breakthroughs'],
-    ['advances', 'in', 'the', 'understanding', 'of', 'electromagnetism', 'or', 'nuclear', 'physics', 'led', 'directly', 'to', 'the', 'development', 'of', 'new', 'products', 'that', 'have', 'dramatically', 'transformed', 'modern', 'day', 'society']
+    ['physics', 'also', 'makes', 'significant', 'contributions', 'through', 'advances', 'in', 'new',
+     'technologies', 'that', 'arise', 'from', 'theoretical', 'breakthroughs'],
+    ['advances', 'in', 'the', 'understanding', 'of', 'electromagnetism', 'or', 'nuclear', 'physics',
+     'led', 'directly', 'to', 'the', 'development', 'of', 'new', 'products', 'that', 'have', 'dramatically',
+     'transformed', 'modern', 'day', 'society']
 ]
 
 d2v_sentences = [models.doc2vec.TaggedDocument(words, [i]) for i, words in enumerate(w2v_texts)]
@@ -129,7 +143,9 @@
 class TestLdaWrapper(unittest.TestCase):
     def setUp(self):
         numpy.random.seed(0)  # set fixed seed to get similar values everytime
-        self.model = LdaTransformer(id2word=dictionary, num_topics=2, passes=100, minimum_probability=0, random_state=numpy.random.seed(0))
+        self.model = LdaTransformer(
+            id2word=dictionary, num_topics=2, passes=100, minimum_probability=0, random_state=numpy.random.seed(0)
+        )
         self.model.fit(corpus)
 
     def testTransform(self):
@@ -157,11 +173,16 @@ def testPartialFit(self):
 
     def testConsistencyWithGensimModel(self):
         # training an LdaTransformer with `num_topics`=10
-        self.model = LdaTransformer(id2word=dictionary, num_topics=10, passes=100, minimum_probability=0, random_state=numpy.random.seed(0))
+        self.model = LdaTransformer(
+            id2word=dictionary, num_topics=10, passes=100, minimum_probability=0, random_state=numpy.random.seed(0)
+        )
         self.model.fit(corpus)
 
         # training a Gensim LdaModel with the same params
-        gensim_ldamodel = models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=10, passes=100, minimum_probability=0, random_state=numpy.random.seed(0))
+        gensim_ldamodel = models.LdaModel(
+            corpus=corpus, id2word=dictionary, num_topics=10, passes=100,
+            minimum_probability=0, random_state=numpy.random.seed(0)
+        )
 
         texts_new = ['graph', 'eulerian']
         bow = self.model.id2word.doc2bow(texts_new)
@@ -190,7 +211,7 @@ def testPipeline(self):
             uncompressed_content = codecs.decode(compressed_content, 'zlib_codec')
             cache = pickle.loads(uncompressed_content)
         data = cache
-        id2word = Dictionary(map(lambda x: x.split(), data.data))
+        id2word = Dictionary([x.split() for x in data.data])
         corpus = [id2word.doc2bow(i.split()) for i in data.data]
         numpy.random.mtrand.RandomState(1)  # set seed for getting same result
         clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
@@ -238,7 +259,10 @@ def testPersistence(self):
         self.assertTrue(passed)
 
     def testModelNotFitted(self):
-        lda_wrapper = LdaTransformer(id2word=dictionary, num_topics=2, passes=100, minimum_probability=0, random_state=numpy.random.seed(0))
+        lda_wrapper = LdaTransformer(
+            id2word=dictionary, num_topics=2, passes=100,
+            minimum_probability=0, random_state=numpy.random.seed(0)
+        )
         texts_new = ['graph', 'eulerian']
         bow = lda_wrapper.id2word.doc2bow(texts_new)
         self.assertRaises(NotFittedError, lda_wrapper.transform, bow)
@@ -280,7 +304,7 @@ def testPipeline(self):
             uncompressed_content = codecs.decode(compressed_content, 'zlib_codec')
             cache = pickle.loads(uncompressed_content)
         data = cache
-        id2word = Dictionary(map(lambda x: x.split(), data.data))
+        id2word = Dictionary([x.split() for x in data.data])
         corpus = [id2word.doc2bow(i.split()) for i in data.data]
         numpy.random.mtrand.RandomState(1)  # set seed for getting same result
         clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
@@ -336,14 +360,14 @@ def testModelNotFitted(self):
 
 class TestLdaSeqWrapper(unittest.TestCase):
     def setUp(self):
-        self.model = LdaSeqTransformer(id2word=dictionary_ldaseq, num_topics=2, time_slice=[10, 10, 11], initialize='gensim')
+        self.model = LdaSeqTransformer(
+            id2word=dictionary_ldaseq, num_topics=2, time_slice=[10, 10, 11], initialize='gensim'
+        )
         self.model.fit(corpus_ldaseq)
 
     def testTransform(self):
         # transforming two documents
-        docs = []
-        docs.append(list(corpus_ldaseq)[0])
-        docs.append(list(corpus_ldaseq)[1])
+        docs = [list(corpus_ldaseq)[0], list(corpus_ldaseq)[1]]
         transformed_vecs = self.model.transform(docs)
         self.assertEqual(transformed_vecs.shape[0], 2)
         self.assertEqual(transformed_vecs.shape[1], self.model.num_topics)
@@ -363,7 +387,7 @@ def testPipeline(self):
         data = cache
         test_data = data.data[0:2]
         test_target = data.target[0:2]
-        id2word = Dictionary(map(lambda x: x.split(), test_data))
+        id2word = Dictionary([x.split() for x in test_data])
         corpus = [id2word.doc2bow(i.split()) for i in test_data]
         model = LdaSeqTransformer(id2word=id2word, num_topics=2, time_slice=[1, 1, 1], initialize='gensim')
         clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
@@ -412,9 +436,7 @@ def setUp(self):
 
     def testTransform(self):
         # tranform two documents
-        docs = []
-        docs.append(list(self.corpus)[0])
-        docs.append(list(self.corpus)[1])
+        docs = [list(self.corpus)[0], list(self.corpus)[1]]
         matrix = self.model.transform(docs)
         self.assertEqual(matrix.shape[0], 2)
         self.assertEqual(matrix.shape[1], self.model.num_topics)
@@ -433,7 +455,7 @@ def testPipeline(self):
             uncompressed_content = codecs.decode(compressed_content, 'zlib_codec')
             cache = pickle.loads(uncompressed_content)
         data = cache
-        id2word = Dictionary(map(lambda x: x.split(), data.data))
+        id2word = Dictionary([x.split() for x in data.data])
         corpus = [id2word.doc2bow(i.split()) for i in data.data]
         numpy.random.mtrand.RandomState(1)  # set seed for getting same result
         clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
@@ -514,11 +536,13 @@ def testPipeline(self):
 
         class_dict = {'mathematics': 1, 'physics': 0}
         train_data = [
-            ('calculus', 'mathematics'), ('mathematical', 'mathematics'), ('geometry', 'mathematics'), ('operations', 'mathematics'), ('curves', 'mathematics'),
-            ('natural', 'physics'), ('nuclear', 'physics'), ('science', 'physics'), ('electromagnetism', 'physics'), ('natural', 'physics')
+            ('calculus', 'mathematics'), ('mathematical', 'mathematics'),
+            ('geometry', 'mathematics'), ('operations', 'mathematics'),
+            ('curves', 'mathematics'), ('natural', 'physics'), ('nuclear', 'physics'),
+            ('science', 'physics'), ('electromagnetism', 'physics'), ('natural', 'physics')
         ]
-        train_input = list(map(lambda x: x[0], train_data))
-        train_target = list(map(lambda x: class_dict[x[1]], train_data))
+        train_input = [x[0] for x in train_data]
+        train_target = [class_dict[x[1]] for x in train_data]
 
         clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
         clf.fit(model.transform(train_input), train_target)
@@ -648,10 +672,7 @@ def setUp(self):
 
     def testTransform(self):
         # tranform multiple documents
-        docs = []
-        docs.append(w2v_texts[0])
-        docs.append(w2v_texts[1])
-        docs.append(w2v_texts[2])
+        docs = [w2v_texts[0], w2v_texts[1], w2v_texts[2]]
         matrix = self.model.transform(docs)
         self.assertEqual(matrix.shape[0], 3)
         self.assertEqual(matrix.shape[1], self.model.size)
@@ -682,8 +703,8 @@ def testPipeline(self):
             (['calculus', 'mathematical'], 'mathematics'), (['geometry', 'operations', 'curves'], 'mathematics'),
             (['natural', 'nuclear'], 'physics'), (['science', 'electromagnetism', 'natural'], 'physics')
         ]
-        train_input = list(map(lambda x: x[0], train_data))
-        train_target = list(map(lambda x: class_dict[x[1]], train_data))
+        train_input = [x[0] for x in train_data]
+        train_target = [class_dict[x[1]] for x in train_data]
 
         clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
         clf.fit(model.transform(train_input), train_target)
@@ -737,7 +758,7 @@ def testTransform(self):
         doc = ['computer system interface time computer system']
         bow_vec = self.model.transform(doc)[0]
         expected_values = [1, 1, 2, 2]  # comparing only the word-counts
-        values = list(map(lambda x: x[1], bow_vec))
+        values = [x[1] for x in bow_vec]
         self.assertEqual(sorted(expected_values), sorted(values))
 
     def testSetGetParams(self):
@@ -794,8 +815,11 @@ def testTransform(self):
         # tranform multiple documents
         docs = [corpus[0], corpus[1]]
         transformed_docs = self.model.transform(docs)
-        expected_docs = [[(0, 0.5773502691896257), (1, 0.5773502691896257), (2, 0.5773502691896257)],
-            [(3, 0.44424552527467476), (4, 0.44424552527467476), (5, 0.3244870206138555), (6, 0.44424552527467476), (7, 0.3244870206138555), (8, 0.44424552527467476)]]
+        expected_docs = [
+            [(0, 0.5773502691896257), (1, 0.5773502691896257), (2, 0.5773502691896257)],
+            [(3, 0.44424552527467476), (4, 0.44424552527467476), (5, 0.3244870206138555),
+             (6, 0.44424552527467476), (7, 0.3244870206138555), (8, 0.44424552527467476)]
+        ]
         self.assertTrue(numpy.allclose(transformed_docs[0], expected_docs[0]))
         self.assertTrue(numpy.allclose(transformed_docs[1], expected_docs[1]))
 
@@ -815,7 +839,7 @@ def testPipeline(self):
             uncompressed_content = codecs.decode(compressed_content, 'zlib_codec')
             cache = pickle.loads(uncompressed_content)
         data = cache
-        id2word = Dictionary(map(lambda x: x.split(), data.data))
+        id2word = Dictionary([x.split() for x in data.data])
         corpus = [id2word.doc2bow(i.split()) for i in data.data]
         tfidf_model = TfIdfTransformer()
         tfidf_model.fit(corpus)
@@ -854,14 +878,20 @@ def testTransform(self):
         # tranform one document
         doc = self.corpus[0]
         transformed_doc = self.model.transform(doc)
-        expected_doc = [[0.81043386270128193, 0.049357139518070477, 0.035840906753517532, 0.026542006926698079, 0.019925705902962578, 0.014776690981729117, 0.011068909979528148]]
+        expected_doc = [
+            [0.81043386270128193, 0.049357139518070477, 0.035840906753517532,
+             0.026542006926698079, 0.019925705902962578, 0.014776690981729117, 0.011068909979528148]
+        ]
         self.assertTrue(numpy.allclose(transformed_doc, expected_doc, atol=1e-2))
 
         # tranform multiple documents
         docs = [self.corpus[0], self.corpus[1]]
         transformed_docs = self.model.transform(docs)
-        expected_docs = [[0.81043386270128193, 0.049357139518070477, 0.035840906753517532, 0.026542006926698079, 0.019925705902962578, 0.014776690981729117, 0.011068909979528148],
-            [0.03795908, 0.39542609, 0.50650585, 0.0151082, 0.01132749, 0., 0.]]
+        expected_docs = [
+            [0.81043386270128193, 0.049357139518070477, 0.035840906753517532,
+             0.026542006926698079, 0.019925705902962578, 0.014776690981729117, 0.011068909979528148],
+            [0.03795908, 0.39542609, 0.50650585, 0.0151082, 0.01132749, 0., 0.]
+        ]
         self.assertTrue(numpy.allclose(transformed_docs[0], expected_docs[0], atol=1e-2))
         self.assertTrue(numpy.allclose(transformed_docs[1], expected_docs[1], atol=1e-2))
 
@@ -881,7 +911,7 @@ def testPipeline(self):
             uncompressed_content = codecs.decode(compressed_content, 'zlib_codec')
             cache = pickle.loads(uncompressed_content)
         data = cache
-        id2word = Dictionary(map(lambda x: x.split(), data.data))
+        id2word = Dictionary([x.split() for x in data.data])
         corpus = [id2word.doc2bow(i.split()) for i in data.data]
         model = HdpTransformer(id2word=id2word)
         clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
diff --git a/gensim/test/test_tfidfmodel.py b/gensim/test/test_tfidfmodel.py
index bb00b5482d..65e2939857 100644
--- a/gensim/test/test_tfidfmodel.py
+++ b/gensim/test/test_tfidfmodel.py
@@ -25,15 +25,17 @@
 
 
 # set up vars used in testing ("Deerwester" from the web tutorial)
-texts = [['human', 'interface', 'computer'],
- ['survey', 'user', 'computer', 'system', 'response', 'time'],
- ['eps', 'user', 'interface', 'system'],
- ['system', 'human', 'system', 'eps'],
- ['user', 'response', 'time'],
- ['trees'],
- ['graph', 'trees'],
- ['graph', 'minors', 'trees'],
- ['graph', 'minors', 'survey']]
+texts = [
+    ['human', 'interface', 'computer'],
+    ['survey', 'user', 'computer', 'system', 'response', 'time'],
+    ['eps', 'user', 'interface', 'system'],
+    ['system', 'human', 'system', 'eps'],
+    ['user', 'response', 'time'],
+    ['trees'],
+    ['graph', 'trees'],
+    ['graph', 'minors', 'trees'],
+    ['graph', 'minors', 'survey']
+]
 dictionary = Dictionary(texts)
 corpus = [dictionary.doc2bow(text) for text in texts]
 
diff --git a/gensim/test/test_wikicorpus.py b/gensim/test/test_wikicorpus.py
index 9bdbcbdb8d..ca81d6e51a 100644
--- a/gensim/test/test_wikicorpus.py
+++ b/gensim/test/test_wikicorpus.py
@@ -28,7 +28,7 @@ class TestWikiCorpus(unittest.TestCase):
 
     # #TODO: sporadic failure to be investigated
     # def test_get_texts_returns_generator_of_lists(self):
-    #     logger.debug("Current Python Version is " + str(sys.version_info))
+    #     logger.debug("Current Python Version is %s", str(sys.version_info))
     #     if sys.version_info < (2, 7, 0):
     #         return
     #
diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py
index 29ae713b90..81123ccd7a 100644
--- a/gensim/test/test_word2vec.py
+++ b/gensim/test/test_word2vec.py
@@ -140,14 +140,18 @@ def test_sg_neg_online(self):
 
     def test_cbow_hs_online(self):
         """Test CBOW w/ hierarchical softmax"""
-        model = word2vec.Word2Vec(sg=0, cbow_mean=1, alpha=0.05, window=5, hs=1, negative=0,
-                                  min_count=3, iter=10, seed=42, workers=2)
+        model = word2vec.Word2Vec(
+            sg=0, cbow_mean=1, alpha=0.05, window=5, hs=1, negative=0,
+            min_count=3, iter=10, seed=42, workers=2
+        )
         self.onlineSanity(model)
 
     def test_cbow_neg_online(self):
         """Test CBOW w/ negative sampling"""
-        model = word2vec.Word2Vec(sg=0, cbow_mean=1, alpha=0.05, window=5, hs=0, negative=15,
-                                  min_count=5, iter=10, seed=42, workers=2, sample=0)
+        model = word2vec.Word2Vec(
+            sg=0, cbow_mean=1, alpha=0.05, window=5, hs=0, negative=15,
+            min_count=5, iter=10, seed=42, workers=2, sample=0
+        )
         self.onlineSanity(model)
 
     def testPersistence(self):
@@ -241,7 +245,9 @@ def testPersistenceWord2VecFormat(self):
         self.assertTrue(np.allclose(model.wv.syn0norm[model.wv.vocab['human'].index], norm_only_model['human']))
         limited_model_kv = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), binary=True, limit=3)
         self.assertEquals(len(limited_model_kv.syn0), 3)
-        half_precision_model_kv = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), binary=True, datatype=np.float16)
+        half_precision_model_kv = keyedvectors.KeyedVectors.load_word2vec_format(
+            testfile(), binary=True, datatype=np.float16
+        )
         self.assertEquals(binary_model_kv.syn0.nbytes, half_precision_model_kv.syn0.nbytes * 2)
 
     def testNoTrainingCFormat(self):
@@ -284,7 +290,9 @@ def testPersistenceWord2VecFormatNonBinary(self):
         norm_only_model = keyedvectors.KeyedVectors.load_word2vec_format(testfile(), binary=False)
         norm_only_model.init_sims(True)
         self.assertFalse(np.allclose(model['human'], norm_only_model['human'], atol=1e-6))
-        self.assertTrue(np.allclose(model.wv.syn0norm[model.wv.vocab['human'].index], norm_only_model['human'], atol=1e-4))
+        self.assertTrue(np.allclose(
+            model.wv.syn0norm[model.wv.vocab['human'].index], norm_only_model['human'], atol=1e-4
+        ))
 
     def testPersistenceWord2VecFormatWithVocab(self):
         """Test storing/loading the entire model and vocabulary in word2vec format."""
@@ -450,14 +458,18 @@ def test_sg_neg(self):
 
     def test_cbow_hs(self):
         """Test CBOW w/ hierarchical softmax"""
-        model = word2vec.Word2Vec(sg=0, cbow_mean=1, alpha=0.05, window=8, hs=1, negative=0,
-                                  min_count=5, iter=10, workers=2, batch_words=1000)
+        model = word2vec.Word2Vec(
+            sg=0, cbow_mean=1, alpha=0.05, window=8, hs=1, negative=0,
+            min_count=5, iter=10, workers=2, batch_words=1000
+        )
         self.model_sanity(model)
 
     def test_cbow_neg(self):
         """Test CBOW w/ negative sampling"""
-        model = word2vec.Word2Vec(sg=0, cbow_mean=1, alpha=0.05, window=5, hs=0, negative=15,
-                                  min_count=5, iter=10, workers=2, sample=0)
+        model = word2vec.Word2Vec(
+            sg=0, cbow_mean=1, alpha=0.05, window=5, hs=0, negative=15,
+            min_count=5, iter=10, workers=2, sample=0
+        )
         self.model_sanity(model)
 
     def test_cosmul(self):
@@ -654,8 +666,10 @@ def testBuildVocabWarning(self, l):
     @log_capture()
     def testTrainWarning(self, l):
         """Test if warning is raised if alpha rises during subsequent calls to train()"""
-        sentences = [['human'],
-                     ['graph', 'trees']]
+        sentences = [
+            ['human'],
+            ['graph', 'trees']
+        ]
         model = word2vec.Word2Vec(min_count=1)
         model.build_vocab(sentences)
         for epoch in range(10):
@@ -814,6 +828,7 @@ def assertLess(self, a, b, msg=None):
 if __name__ == '__main__':
     logging.basicConfig(
         format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s',
-        level=logging.DEBUG)
+        level=logging.DEBUG
+    )
     logging.info("using optimization %s", word2vec.FAST_VERSION)
     unittest.main()
diff --git a/gensim/test/test_wordrank_wrapper.py b/gensim/test/test_wordrank_wrapper.py
index 8f8d5b5f9d..4ecb9f7c70 100644
--- a/gensim/test/test_wordrank_wrapper.py
+++ b/gensim/test/test_wordrank_wrapper.py
@@ -36,7 +36,10 @@ def setUp(self):
         self.wr_file = datapath('test_glove.txt')
         if not self.wr_path:
             return
-        self.test_model = wordrank.Wordrank.train(self.wr_path, self.corpus_file, self.out_name, iter=6, dump_period=5, period=5, np=2, cleanup_files=True)
+        self.test_model = wordrank.Wordrank.train(
+            self.wr_path, self.corpus_file, self.out_name, iter=6,
+            dump_period=5, period=5, np=2, cleanup_files=True
+        )
 
     def testLoadWordrankFormat(self):
         """Test model successfully loaded from Wordrank format file"""
diff --git a/gensim/topic_coherence/direct_confirmation_measure.py b/gensim/topic_coherence/direct_confirmation_measure.py
index 247d8c146d..a3b3463391 100644
--- a/gensim/topic_coherence/direct_confirmation_measure.py
+++ b/gensim/topic_coherence/direct_confirmation_measure.py
@@ -9,7 +9,6 @@
 """
 
 import logging
-
 import numpy as np
 
 logger = logging.getLogger(__name__)
@@ -38,8 +37,10 @@ def log_conditional_probability(segmented_topics, accumulator):
         for w_prime, w_star in s_i:
             w_star_count = accumulator[w_star]
             if w_star_count == 0:
-                raise ValueError("Topic with id %d not found in corpus used to compute coherence. "
-                    "Try using a larger corpus with a smaller vocobulary and/or setting a smaller value of `topn` for `CoherenceModel`." % (w_star))
+                raise ValueError(
+                    "Topic with id %d not found in corpus used to compute coherence. "
+                    "Try using a larger corpus with a smaller vocabulary and/or setting a smaller value of `topn` for `CoherenceModel`." % w_star
+                )
             co_occur_count = accumulator[w_prime, w_star]
             m_lc_i = np.log(((co_occur_count / num_docs) + EPSILON) / (w_star_count / num_docs))
 
diff --git a/gensim/topic_coherence/probability_estimation.py b/gensim/topic_coherence/probability_estimation.py
index 7832494a5c..1ddd70cbb0 100644
--- a/gensim/topic_coherence/probability_estimation.py
+++ b/gensim/topic_coherence/probability_estimation.py
@@ -11,8 +11,7 @@
 import itertools
 import logging
 
-from gensim.topic_coherence.text_analysis import \
-    CorpusAccumulator, WordOccurrenceAccumulator, ParallelWordOccurrenceAccumulator
+from gensim.topic_coherence.text_analysis import CorpusAccumulator, WordOccurrenceAccumulator, ParallelWordOccurrenceAccumulator
 
 logger = logging.getLogger(__name__)
 
diff --git a/gensim/topic_coherence/segmentation.py b/gensim/topic_coherence/segmentation.py
index 9097036914..2db0d695d2 100644
--- a/gensim/topic_coherence/segmentation.py
+++ b/gensim/topic_coherence/segmentation.py
@@ -27,18 +27,18 @@ def s_one_pre(topics):
         topics : list of topics obtained from an algorithm such as LDA. Is a list such as [array([ 9, 10, 11]), array([ 9, 10,  7]), ...]
 
     Returns:
-        s_one_pre : list of list of (W', W*) tuples for all unique topic ids
+        s_one_pre_res : list of list of (W', W*) tuples for all unique topic ids
     """
-    s_one_pre = []
+    s_one_pre_res = []
 
     for top_words in topics:
         s_one_pre_t = []
         for w_prime_index, w_prime in enumerate(top_words[1:]):
             for w_star in top_words[:w_prime_index + 1]:
                 s_one_pre_t.append((w_prime, w_star))
-        s_one_pre.append(s_one_pre_t)
+        s_one_pre_res.append(s_one_pre_t)
 
-    return s_one_pre
+    return s_one_pre_res
 
 
 def s_one_one(topics):
@@ -55,9 +55,9 @@ def s_one_one(topics):
         topics : list of topics obtained from an algorithm such as LDA. Is a list such as [array([ 9, 10, 11]), array([ 9, 10,  7]), ...]
 
     Returns:
-        s_one_one : list of list of (W', W*) tuples for all unique topic ids
+        s_one_one_res : list of list of (W', W*) tuples for all unique topic ids
     """
-    s_one_one = []
+    s_one_one_res = []
 
     for top_words in topics:
         s_one_one_t = []
@@ -67,9 +67,9 @@ def s_one_one(topics):
                     continue
                 else:
                     s_one_one_t.append((w_prime, w_star))
-        s_one_one.append(s_one_one_t)
+        s_one_one_res.append(s_one_one_t)
 
-    return s_one_one
+    return s_one_one_res
 
 
 def s_one_set(topics):
@@ -87,14 +87,14 @@ def s_one_set(topics):
         topics : list of topics obtained from an algorithm such as LDA. Is a list such as [array([ 9, 10, 11]), array([ 9, 10,  7]), ...]
 
     Returns:
-        s_one_set : list of list of (W', W*) tuples for all unique topic ids.
+        s_one_set_res : list of list of (W', W*) tuples for all unique topic ids.
     """
-    s_one_set = []
+    s_one_set_res = []
 
     for top_words in topics:
         s_one_set_t = []
         for w_prime in top_words:
             s_one_set_t.append((w_prime, top_words))
-        s_one_set.append(s_one_set_t)
+        s_one_set_res.append(s_one_set_t)
 
-    return s_one_set
+    return s_one_set_res
diff --git a/gensim/topic_coherence/text_analysis.py b/gensim/topic_coherence/text_analysis.py
index 7305fe9792..3254a34885 100644
--- a/gensim/topic_coherence/text_analysis.py
+++ b/gensim/topic_coherence/text_analysis.py
@@ -64,9 +64,7 @@ def num_docs(self):
     def num_docs(self, num):
         self._num_docs = num
         if self._num_docs % self.log_every == 0:
-            logger.info(
-                "%s accumulated stats from %d documents",
-                self.__class__.__name__, self._num_docs)
+            logger.info("%s accumulated stats from %d documents", self.__class__.__name__, self._num_docs)
 
     def analyze_text(self, text, doc_num=None):
         raise NotImplementedError("Base classes should implement analyze_text.")
@@ -369,9 +367,7 @@ def queue_all_texts(self, q, texts, window_size):
             before = self._num_docs / self.log_every
             self._num_docs += sum(len(doc) - window_size + 1 for doc in batch)
             if before < (self._num_docs / self.log_every):
-                logger.info(
-                    "%d batches submitted to accumulate stats from %d documents (%d virtual)",
-                    (batch_num + 1), (batch_num + 1) * self.batch_size, self._num_docs)
+                logger.info("%d batches submitted to accumulate stats from %d documents (%d virtual)", (batch_num + 1), (batch_num + 1) * self.batch_size, self._num_docs)
 
     def terminate_workers(self, input_q, output_q, workers, interrupted=False):
         """Wait until all workers have transmitted their WordOccurrenceAccumulator instances,
@@ -414,9 +410,7 @@ def merge_accumulators(self, accumulators):
         # Workers do partial accumulation, so none of the co-occurrence matrices are symmetrized.
         # This is by design, to avoid unnecessary matrix additions/conversions during accumulation.
         accumulator._symmetrize()
-        logger.info(
-            "accumulated word occurrence stats for %d virtual documents",
-            accumulator.num_docs)
+        logger.info("accumulated word occurrence stats for %d virtual documents", accumulator.num_docs)
         return accumulator
 
 
@@ -435,9 +429,7 @@ def run(self):
         try:
             self._run()
         except KeyboardInterrupt:
-            logger.info(
-                "%s interrupted after processing %d documents",
-                self.__class__.__name__, self.accumulator.num_docs)
+            logger.info("%s interrupted after processing %d documents", self.__class__.__name__, self.accumulator.num_docs)
         except Exception:
             logger.exception("worker encountered unexpected exception")
         finally:
@@ -455,13 +447,9 @@ def _run(self):
 
             self.accumulator.partial_accumulate(docs, self.window_size)
             n_docs += len(docs)
-            logger.debug(
-                "completed batch %d; %d documents processed (%d virtual)",
-                batch_num, n_docs, self.accumulator.num_docs)
+            logger.debug("completed batch %d; %d documents processed (%d virtual)", batch_num, n_docs, self.accumulator.num_docs)
 
-        logger.debug(
-            "finished all batches; %d documents processed (%d virtual)",
-            n_docs, self.accumulator.num_docs)
+        logger.debug("finished all batches; %d documents processed (%d virtual)", n_docs, self.accumulator.num_docs)
 
     def reply_to_master(self):
         logger.info("serializing accumulator to return to master...")
diff --git a/gensim/utils.py b/gensim/utils.py
index 47d7bc98cd..10555d2b51 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -106,12 +106,12 @@ def _synched(func):
         @wraps(func)
         def _synchronizer(self, *args, **kwargs):
             tlock = getattr(self, tlockname)
-            logger.debug("acquiring lock %r for %s" % (tlockname, func.__name__))
+            logger.debug("acquiring lock %r for %s", tlockname, func.__name__)
 
             with tlock:  # use lock as a context manager to perform safe acquire/release pairs
-                logger.debug("acquired lock %r for %s" % (tlockname, func.__name__))
+                logger.debug("acquired lock %r for %s", tlockname, func.__name__)
                 result = func(self, *args, **kwargs)
-                logger.debug("releasing lock %r for %s" % (tlockname, func.__name__))
+                logger.debug("releasing lock %r for %s", tlockname, func.__name__)
                 return result
         return _synchronizer
     return _synched
@@ -181,8 +181,7 @@ def copytree_hardlink(source, dest):
         shutil.copy2 = copy2
 
 
-def tokenize(text, lowercase=False, deacc=False, encoding='utf8', errors="strict", to_lower=False,
-             lower=False):
+def tokenize(text, lowercase=False, deacc=False, encoding='utf8', errors="strict", to_lower=False, lower=False):
     """
     Iteratively yield tokens as unicode strings, removing accent marks
     and optionally lowercasing the unidoce string by assigning True
@@ -275,7 +274,7 @@ def load(cls, fname, mmap=None):
         is encountered.
 
         """
-        logger.info("loading %s object from %s" % (cls.__name__, fname))
+        logger.info("loading %s object from %s", cls.__name__, fname)
 
         compress, subname = SaveLoad._adapt_by_suffix(fname)
 
@@ -292,17 +291,16 @@ def _load_specials(self, fname, mmap, compress, subname):
         """
         mmap_error = lambda x, y: IOError(
             'Cannot mmap compressed object %s in file %s. ' % (x, y) +
-            'Use `load(fname, mmap=None)` or uncompress files manually.')
+            'Use `load(fname, mmap=None)` or uncompress files manually.'
+        )
 
         for attrib in getattr(self, '__recursive_saveloads', []):
             cfname = '.'.join((fname, attrib))
-            logger.info("loading %s recursively from %s.* with mmap=%s" % (
-                attrib, cfname, mmap))
+            logger.info("loading %s recursively from %s.* with mmap=%s", attrib, cfname, mmap)
             getattr(self, attrib)._load_specials(cfname, mmap, compress, subname)
 
         for attrib in getattr(self, '__numpys', []):
-            logger.info("loading %s from %s with mmap=%s" % (
-                attrib, subname(fname, attrib), mmap))
+            logger.info("loading %s from %s with mmap=%s", attrib, subname(fname, attrib), mmap)
 
             if compress:
                 if mmap:
@@ -315,8 +313,7 @@ def _load_specials(self, fname, mmap, compress, subname):
             setattr(self, attrib, val)
 
         for attrib in getattr(self, '__scipys', []):
-            logger.info("loading %s from %s with mmap=%s" % (
-                attrib, subname(fname, attrib), mmap))
+            logger.info("loading %s from %s with mmap=%s", attrib, subname(fname, attrib), mmap)
             sparse = unpickle(subname(fname, attrib))
             if compress:
                 if mmap:
@@ -334,7 +331,7 @@ def _load_specials(self, fname, mmap, compress, subname):
             setattr(self, attrib, sparse)
 
         for attrib in getattr(self, '__ignoreds', []):
-            logger.info("setting ignored attribute %s to None" % (attrib))
+            logger.info("setting ignored attribute %s to None", attrib)
             setattr(self, attrib, None)
 
     @staticmethod
@@ -346,10 +343,9 @@ def _adapt_by_suffix(fname):
         else:
             compress = False
             subname = lambda *args: '.'.join(list(args) + ['npy'])
-        return (compress, subname)
+        return compress, subname
 
-    def _smart_save(self, fname, separately=None, sep_limit=10 * 1024**2,
-                    ignore=frozenset(), pickle_protocol=2):
+    def _smart_save(self, fname, separately=None, sep_limit=10 * 1024**2, ignore=frozenset(), pickle_protocol=2):
         """
         Save the object to file (also see `load`).
 
@@ -370,9 +366,7 @@ def _smart_save(self, fname, separately=None, sep_limit=10 * 1024**2,
         in both Python 2 and 3.
 
         """
-        logger.info(
-            "saving %s object under %s, separately %s" % (
-                self.__class__.__name__, fname, separately))
+        logger.info("saving %s object under %s, separately %s", self.__class__.__name__, fname, separately)
 
         compress, subname = SaveLoad._adapt_by_suffix(fname)
 
@@ -419,17 +413,14 @@ def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol,
             if hasattr(val, '_save_specials'):  # better than 'isinstance(val, SaveLoad)' if IPython reloading
                 recursive_saveloads.append(attrib)
                 cfname = '.'.join((fname, attrib))
-                restores.extend(val._save_specials(
-                    cfname, None, sep_limit, ignore,
-                    pickle_protocol, compress, subname))
+                restores.extend(val._save_specials(cfname, None, sep_limit, ignore, pickle_protocol, compress, subname))
 
         try:
             numpys, scipys, ignoreds = [], [], []
             for attrib, val in iteritems(asides):
                 if isinstance(val, np.ndarray) and attrib not in ignore:
                     numpys.append(attrib)
-                    logger.info("storing np array '%s' to %s" % (
-                        attrib, subname(fname, attrib)))
+                    logger.info("storing np array '%s' to %s", attrib, subname(fname, attrib))
 
                     if compress:
                         np.savez_compressed(subname(fname, attrib), val=np.ascontiguousarray(val))
@@ -438,15 +429,15 @@ def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol,
 
                 elif isinstance(val, (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix)) and attrib not in ignore:
                     scipys.append(attrib)
-                    logger.info("storing scipy.sparse array '%s' under %s" % (
-                        attrib, subname(fname, attrib)))
+                    logger.info("storing scipy.sparse array '%s' under %s", attrib, subname(fname, attrib))
 
                     if compress:
                         np.savez_compressed(
                             subname(fname, attrib, 'sparse'),
                             data=val.data,
                             indptr=val.indptr,
-                            indices=val.indices)
+                            indices=val.indices
+                        )
                     else:
                         np.save(subname(fname, attrib, 'data'), val.data)
                         np.save(subname(fname, attrib, 'indptr'), val.indptr)
@@ -461,7 +452,7 @@ def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol,
                     finally:
                         val.data, val.indptr, val.indices = data, indptr, indices
                 else:
-                    logger.info("not storing attribute %s" % (attrib))
+                    logger.info("not storing attribute %s", attrib)
                     ignoreds.append(attrib)
 
             self.__dict__['__numpys'] = numpys
@@ -475,8 +466,7 @@ def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol,
             raise
         return restores + [(self, asides)]
 
-    def save(self, fname_or_handle, separately=None, sep_limit=10 * 1024**2,
-             ignore=frozenset(), pickle_protocol=2):
+    def save(self, fname_or_handle, separately=None, sep_limit=10 * 1024**2, ignore=frozenset(), pickle_protocol=2):
         """
         Save the object to file (also see `load`).
 
@@ -504,11 +494,10 @@ def save(self, fname_or_handle, separately=None, sep_limit=10 * 1024**2,
         """
         try:
             _pickle.dump(self, fname_or_handle, protocol=pickle_protocol)
-            logger.info("saved %s object" % self.__class__.__name__)
+            logger.info("saved %s object", self.__class__.__name__)
         except TypeError:  # `fname_or_handle` does not have write attribute
             self._smart_save(fname_or_handle, separately, sep_limit, ignore,
                              pickle_protocol=pickle_protocol)
-# endclass SaveLoad
 
 
 def identity(p):
@@ -864,10 +853,8 @@ def run(self):
                 qsize = self.q.qsize()
             except NotImplementedError:
                 qsize = '?'
-            logger.debug("prepared another chunk of %i documents (qsize=%s)" %
-                        (len(wrapped_chunk[0]), qsize))
+            logger.debug("prepared another chunk of %i documents (qsize=%s)", len(wrapped_chunk[0]), qsize)
             self.q.put(wrapped_chunk.pop(), block=True)
-# endclass InputQueue
 
 
 if os.name == 'nt':
@@ -957,7 +944,7 @@ def revdict(d):
     result (which one is kept is arbitrary).
 
     """
-    return dict((v, k) for (k, v) in iteritems(dict(d)))
+    return {v: k for (k, v) in iteritems(dict(d))}
 
 
 def toptexts(query, texts, index, n=10):
@@ -974,10 +961,7 @@ def toptexts(query, texts, index, n=10):
     sims = index[query]  # perform a similarity query against the corpus
     sims = sorted(enumerate(sims), key=lambda item: -item[1])
 
-    result = []
-    for topid, topcosine in sims[:n]:  # only consider top-n most similar docs
-        result.append((topid, topcosine, texts[topid]))
-    return result
+    return [(topid, topcosine, texts[topid]) for topid, topcosine in sims[:n]]  # only consider top-n most similar docs
 
 
 def randfname(prefix='gensim'):
@@ -997,7 +981,7 @@ def upload_chunked(server, docs, chunksize=1000, preprocess=None):
     start = 0
     for chunk in grouper(docs, chunksize):
         end = start + len(chunk)
-        logger.info("uploading documents %i-%i" % (start, end - 1))
+        logger.info("uploading documents %i-%i", start, end - 1)
         if preprocess is not None:
             pchunk = []
             for doc in chunk:
@@ -1039,7 +1023,7 @@ def pyro_daemon(name, obj, random_suffix=False, ip=None, port=None, ns_conf=None
             uri = daemon.register(obj, name)
             ns.remove(name)
             ns.register(name, uri)
-            logger.info("%s registered with nameserver (URI '%s')" % (name, uri))
+            logger.info("%s registered with nameserver (URI '%s')", name, uri)
             daemon.requestLoop()
 
 
@@ -1054,8 +1038,7 @@ def has_pattern():
         return False
 
 
-def lemmatize(
-        content, allowed_tags=re.compile('(NN|VB|JJ|RB)'), light=False,
+def lemmatize(content, allowed_tags=re.compile('(NN|VB|JJ|RB)'), light=False,
         stopwords=frozenset(), min_length=2, max_length=15):
     """
     This function is only available when the optional 'pattern' package is installed.
@@ -1109,9 +1092,7 @@ def mock_data_row(dim=1000, prob_nnz=0.5, lam=1.0):
 
     """
     nnz = np.random.uniform(size=(dim,))
-    data = [(i, float(np.random.poisson(lam=lam) + 1.0))
-            for i in xrange(dim) if nnz[i] < prob_nnz]
-    return data
+    return [(i, float(np.random.poisson(lam=lam) + 1.0)) for i in xrange(dim) if nnz[i] < prob_nnz]
 
 
 def mock_data(n_items=1000, dim=1000, prob_nnz=0.5, lam=1.0):
@@ -1120,9 +1101,7 @@ def mock_data(n_items=1000, dim=1000, prob_nnz=0.5, lam=1.0):
     to be used as a mock corpus.
 
     """
-    data = [mock_data_row(dim=dim, prob_nnz=prob_nnz, lam=lam)
-            for _ in xrange(n_items)]
-    return data
+    return [mock_data_row(dim=dim, prob_nnz=prob_nnz, lam=lam) for _ in xrange(n_items)]
 
 
 def prune_vocab(vocab, min_reduce, trim_rule=None):
@@ -1138,8 +1117,7 @@ def prune_vocab(vocab, min_reduce, trim_rule=None):
         if not keep_vocab_item(w, vocab[w], min_reduce, trim_rule):  # vocab[w] <= min_reduce:
             result += vocab[w]
             del vocab[w]
-    logger.info("pruned out %i tokens with count <=%i (before %i, after %i)",
-                old_len - len(vocab), min_reduce, old_len, len(vocab))
+    logger.info("pruned out %i tokens with count <=%i (before %i, after %i)", old_len - len(vocab), min_reduce, old_len, len(vocab))
     return result
 
 
diff --git a/setup.py b/setup.py
index afeff174cd..90fd484c13 100644
--- a/setup.py
+++ b/setup.py
@@ -107,7 +107,7 @@ def finalize_options(self):
 
 cmdclass = {'build_ext': custom_build_ext}
 
-WHEELHOUSE_UPLOADER_COMMANDS = set(['fetch_artifacts', 'upload_all'])
+WHEELHOUSE_UPLOADER_COMMANDS = {'fetch_artifacts', 'upload_all'}
 if WHEELHOUSE_UPLOADER_COMMANDS.intersection(sys.argv):
     import wheelhouse_uploader.cmd
     cmdclass.update(vars(wheelhouse_uploader.cmd))