diff --git a/docs/notebooks/sklearn_api.ipynb b/docs/notebooks/sklearn_api.ipynb
index 3273b70728..b0e036747c 100644
--- a/docs/notebooks/sklearn_api.ipynb
+++ b/docs/notebooks/sklearn_api.ipynb
@@ -19,13 +19,25 @@
    "metadata": {},
    "source": [
     "The wrappers available (as of now) are :\n",
-    "* LdaModel (```gensim.sklearn_api.ldamodel.LdaTransformer```),which implements gensim's ```LDA Model``` in a scikit-learn interface\n",
+    "* LdaModel (```gensim.sklearn_api.ldamodel.LdaTransformer```), which implements gensim's ```LDA Model``` in a scikit-learn interface\n",
     "\n",
-    "* LsiModel (```gensim.sklearn_api.lsimodel.LsiTransformer```),which implements gensim's ```LSI Model``` in a scikit-learn interface\n",
+    "* LsiModel (```gensim.sklearn_api.lsimodel.LsiTransformer```), which implements gensim's ```LSI Model``` in a scikit-learn interface\n",
     "\n",
-    "* RpModel (```gensim.sklearn_api.rpmodel.RpTransformer```),which implements gensim's ```Random Projections Model``` in a scikit-learn interface\n",
+    "* RpModel (```gensim.sklearn_api.rpmodel.RpTransformer```), which implements gensim's ```Random Projections Model``` in a scikit-learn interface\n",
     "\n",
-    "* LDASeq Model (```gensim.sklearn_api.ldaseqmodel.LdaSeqTransformer```),which implements gensim's ```LdaSeqModel``` in a scikit-learn interface"
+    "* LDASeq Model (```gensim.sklearn_api.ldaseqmodel.LdaSeqTransformer```), which implements gensim's ```LdaSeqModel``` in a scikit-learn interface\n",
+    "\n",
+    "* Word2Vec Model (```gensim.sklearn_api.w2vmodel.W2VTransformer```), which implements gensim's ```Word2Vec``` in a scikit-learn interface\n",
+    "\n",
+    "* AuthorTopicModel Model (```gensim.sklearn_api.atmodel.AuthorTopicTransformer```), which implements gensim's ```AuthorTopicModel``` in a scikit-learn interface\n",
+    "\n",
+    "* Doc2Vec Model (```gensim.sklearn_api.d2vmodel.D2VTransformer```), which implements gensim's ```Doc2Vec``` in a scikit-learn interface\n",
+    "\n",
+    "* Text2Bow Model (```gensim.sklearn_api.text2bow.Text2BowTransformer```), which implements gensim's ```Dictionary``` in a scikit-learn interface\n",
+    "\n",
+    "* TfidfModel Model (```gensim.sklearn_api.tfidf.TfIdfTransformer```), which implements gensim's ```TfidfModel``` in a scikit-learn interface\n",
+    "\n",
+    "* HdpModel Model (```gensim.sklearn_api.hdp.HdpTransformer```), which implements gensim's ```HdpModel``` in a scikit-learn interface"
    ]
   },
   {
@@ -145,7 +157,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {
     "collapsed": true
    },
@@ -160,7 +172,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {
     "collapsed": true
    },
@@ -180,7 +192,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {
     "collapsed": true
    },
@@ -200,7 +212,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {
     "collapsed": true
    },
@@ -314,7 +326,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 8,
    "metadata": {
     "collapsed": true
    },
@@ -333,7 +345,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 9,
    "metadata": {
     "collapsed": true
    },
@@ -345,7 +357,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -364,9 +376,10 @@
    "source": [
     "model = LdaTransformer(num_topics=15, id2word=id2word, iterations=10, random_state=37)\n",
     "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)  # l2 penalty used\n",
-    "pipe = Pipeline((('features', model,), ('classifier', clf)))\n",
+    "pipe = Pipeline([('features', model,), ('classifier', clf)])\n",
     "pipe.fit(corpus, data.target)\n",
     "print_features_pipe(pipe, id2word.values())\n",
+    "\n",
     "print(pipe.score(corpus, data.target))"
    ]
   },
@@ -386,7 +399,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 11,
    "metadata": {
     "collapsed": true
    },
@@ -404,18 +417,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[ 0.13653967 -0.00378269  0.02652037  0.08496786 -0.02401959 -0.60089273\n",
-      " -1.0708177  -0.03932274 -0.43813039 -0.54848409 -0.20147759  0.21781259\n",
-      "  1.30378972 -0.08678691 -0.17529122]\n",
-      "Positive features: internet...:1.30 trawling:0.22 Fame.:0.14 Keach:0.08 Fame,:0.03\n",
-      "Negative features: 01101001B:-1.07 comp.org.eff.talk.:-0.60 red@redpoll.neoucom.edu:-0.55 circuitry:-0.44 hanging:-0.20 >Pat:-0.18 dome.:-0.09 *best*:-0.04 comp.org.eff.talk,:-0.02 considered,:-0.00\n",
+      "[ 0.13655775  0.00381287  0.02643593 -0.08499907 -0.02387209  0.6004697\n",
+      "  1.07090198  0.03926809  0.43769831  0.54886088 -0.20186911 -0.21785685\n",
+      "  1.30488175  0.08663351  0.17558704]\n",
+      "Positive features: internet...:1.30 01101001B:1.07 comp.org.eff.talk.:0.60 red@redpoll.neoucom.edu:0.55 circuitry:0.44 >Pat:0.18 Fame.:0.14 dome.:0.09 *best*:0.04 Fame,:0.03\n",
+      "Negative features: trawling:-0.22 hanging:-0.20 Keach:-0.08 comp.org.eff.talk,:-0.02\n",
       "0.865771812081\n"
      ]
     }
@@ -423,9 +436,10 @@
    "source": [
     "model = LsiTransformer(num_topics=15, id2word=id2word)\n",
     "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)  # l2 penalty used\n",
-    "pipe = Pipeline((('features', model,), ('classifier', clf)))\n",
+    "pipe = Pipeline([('features', model,), ('classifier', clf)])\n",
     "pipe.fit(corpus, data.target)\n",
     "print_features_pipe(pipe, id2word.values())\n",
+    "\n",
     "print(pipe.score(corpus, data.target))"
    ]
   },
@@ -445,7 +459,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 13,
    "metadata": {
     "collapsed": true
    },
@@ -463,17 +477,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[-0.03186506 -0.00872616]\n",
-      "Positive features: \n",
-      "Negative features: Fame.:-0.03 considered,:-0.01\n",
-      "0.621644295302\n"
+      "[-0.01217523  0.0109422 ]\n",
+      "Positive features: considered,:0.01\n",
+      "Negative features: Fame.:-0.01\n",
+      "0.604865771812\n"
      ]
     }
    ],
@@ -481,9 +495,10 @@
     "model = RpTransformer(num_topics=2)\n",
     "np.random.mtrand.RandomState(1)  # set seed for getting same result\n",
     "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)  # l2 penalty used\n",
-    "pipe = Pipeline((('features', model,), ('classifier', clf)))\n",
+    "pipe = Pipeline([('features', model,), ('classifier', clf)])\n",
     "pipe.fit(corpus, data.target)\n",
     "print_features_pipe(pipe, id2word.values())\n",
+    "\n",
     "print(pipe.score(corpus, data.target))"
    ]
   },
@@ -503,7 +518,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 15,
    "metadata": {
     "collapsed": true
    },
@@ -521,7 +536,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -546,15 +561,411 @@
    "source": [
     "test_data = data.data[0:2]\n",
     "test_target = data.target[0:2]\n",
-    "id2word = Dictionary(map(lambda x: x.split(), test_data))\n",
-    "corpus = [id2word.doc2bow(i.split()) for i in test_data]\n",
+    "id2word_ldaseq = Dictionary(map(lambda x: x.split(), test_data))\n",
+    "corpus_ldaseq = [id2word_ldaseq.doc2bow(i.split()) for i in test_data]\n",
     "\n",
-    "model = LdaSeqTransformer(id2word=id2word, num_topics=2, time_slice=[1, 1, 1], initialize='gensim')\n",
+    "model = LdaSeqTransformer(id2word=id2word_ldaseq, num_topics=2, time_slice=[1, 1, 1], initialize='gensim')\n",
     "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)  # l2 penalty used\n",
-    "pipe = Pipeline((('features', model,), ('classifier', clf)))\n",
-    "pipe.fit(corpus, test_target)\n",
-    "print_features_pipe(pipe, id2word.values())\n",
-    "print(pipe.score(corpus, test_target))"
+    "pipe = Pipeline([('features', model,), ('classifier', clf)])\n",
+    "pipe.fit(corpus_ldaseq, test_target)\n",
+    "print_features_pipe(pipe, id2word_ldaseq.values())\n",
+    "\n",
+    "print(pipe.score(corpus_ldaseq, test_target))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "### Word2Vec Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To use Word2Vec model begin with importing Word2Vec wrapper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from gensim.sklearn_api import W2VTransformer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Example of Using Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.7\n"
+     ]
+    }
+   ],
+   "source": [
+    "w2v_texts = [\n",
+    "    ['calculus', 'is', 'the', 'mathematical', 'study', 'of', 'continuous', 'change'],\n",
+    "    ['geometry', 'is', 'the', 'study', 'of', 'shape'],\n",
+    "    ['algebra', 'is', 'the', 'study', 'of', 'generalizations', 'of', 'arithmetic', 'operations'],\n",
+    "    ['differential', 'calculus', 'is', 'related', 'to', 'rates', 'of', 'change', 'and', 'slopes', 'of', 'curves'],\n",
+    "    ['integral', 'calculus', 'is', 'realted', 'to', 'accumulation', 'of', 'quantities', 'and', 'the', 'areas', 'under', 'and', 'between', 'curves'],\n",
+    "    ['physics', 'is', 'the', 'natural', 'science', 'that', 'involves', 'the', 'study', 'of', 'matter', 'and', 'its', 'motion', 'and', 'behavior', 'through', 'space', 'and', 'time'],\n",
+    "    ['the', 'main', 'goal', 'of', 'physics', 'is', 'to', 'understand', 'how', 'the', 'universe', 'behaves'],\n",
+    "    ['physics', 'also', 'makes', 'significant', 'contributions', 'through', 'advances', 'in', 'new', 'technologies', 'that', 'arise', 'from', 'theoretical', 'breakthroughs'],\n",
+    "    ['advances', 'in', 'the', 'understanding', 'of', 'electromagnetism', 'or', 'nuclear', 'physics', 'led', 'directly', 'to', 'the', 'development', 'of', 'new', 'products', 'that', 'have', 'dramatically', 'transformed', 'modern', 'day', 'society']\n",
+    "]\n",
+    "\n",
+    "model = W2VTransformer(size=10, min_count=1)\n",
+    "model.fit(w2v_texts)\n",
+    "\n",
+    "class_dict = {'mathematics': 1, 'physics': 0}\n",
+    "train_data = [\n",
+    "    ('calculus', 'mathematics'), ('mathematical', 'mathematics'), ('geometry', 'mathematics'), ('operations', 'mathematics'), ('curves', 'mathematics'),\n",
+    "    ('natural', 'physics'), ('nuclear', 'physics'), ('science', 'physics'), ('electromagnetism', 'physics'), ('natural', 'physics')\n",
+    "]\n",
+    "\n",
+    "train_input = list(map(lambda x: x[0], train_data))\n",
+    "train_target = list(map(lambda x: class_dict[x[1]], train_data))\n",
+    "\n",
+    "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)\n",
+    "clf.fit(model.transform(train_input), train_target)\n",
+    "text_w2v = Pipeline([('features', model,), ('classifier', clf)])\n",
+    "score = text_w2v.score(train_input, train_target)\n",
+    "\n",
+    "print(score)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### AuthorTopic Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To use AuthorTopic model begin with importing AuthorTopic wrapper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from gensim.sklearn_api import AuthorTopicTransformer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Example of Using Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1 0 0]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn import cluster\n",
+    "\n",
+    "atm_texts = [\n",
+    "    ['complier', 'system', 'computer'],\n",
+    "    ['eulerian', 'node', 'cycle', 'graph', 'tree', 'path'],\n",
+    "    ['graph', 'flow', 'network', 'graph'],\n",
+    "    ['loading', 'computer', 'system'],\n",
+    "    ['user', 'server', 'system'],\n",
+    "    ['tree', 'hamiltonian'],\n",
+    "    ['graph', 'trees'],\n",
+    "    ['computer', 'kernel', 'malfunction', 'computer'],\n",
+    "    ['server', 'system', 'computer'],\n",
+    "]\n",
+    "atm_dictionary = Dictionary(atm_texts)\n",
+    "atm_corpus = [atm_dictionary.doc2bow(text) for text in atm_texts]\n",
+    "author2doc = {'john': [0, 1, 2, 3, 4, 5, 6], 'jane': [2, 3, 4, 5, 6, 7, 8], 'jack': [0, 2, 4, 6, 8], 'jill': [1, 3, 5, 7]}\n",
+    "\n",
+    "model = AuthorTopicTransformer(id2word=atm_dictionary, author2doc=author2doc, num_topics=10, passes=100)\n",
+    "model.fit(atm_corpus)\n",
+    "\n",
+    "# create and train clustering model\n",
+    "clstr = cluster.MiniBatchKMeans(n_clusters=2)\n",
+    "authors_full = ['john', 'jane', 'jack', 'jill']\n",
+    "clstr.fit(model.transform(authors_full))\n",
+    "\n",
+    "# stack together the two models in a pipeline\n",
+    "text_atm = Pipeline([('features', model,), ('cluster', clstr)])\n",
+    "author_list = ['jane', 'jack', 'jill']\n",
+    "ret_val = text_atm.predict(author_list)\n",
+    "\n",
+    "print(ret_val)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Doc2Vec Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To use Doc2Vec model begin with importing Doc2Vec wrapper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from gensim.sklearn_api import D2VTransformer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Example of Using Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "from gensim.models import doc2vec\n",
+    "d2v_sentences = [doc2vec.TaggedDocument(words, [i]) for i, words in enumerate(w2v_texts)]\n",
+    "\n",
+    "model = D2VTransformer(min_count=1)\n",
+    "model.fit(d2v_sentences)\n",
+    "\n",
+    "class_dict = {'mathematics': 1, 'physics': 0}\n",
+    "train_data = [\n",
+    "    (['calculus', 'mathematical'], 'mathematics'), (['geometry', 'operations', 'curves'], 'mathematics'),\n",
+    "    (['natural', 'nuclear'], 'physics'), (['science', 'electromagnetism', 'natural'], 'physics')\n",
+    "]\n",
+    "train_input = list(map(lambda x: x[0], train_data))\n",
+    "train_target = list(map(lambda x: class_dict[x[1]], train_data))\n",
+    "\n",
+    "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)\n",
+    "clf.fit(model.transform(train_input), train_target)\n",
+    "text_d2v = Pipeline([('features', model,), ('classifier', clf)])\n",
+    "score = text_d2v.score(train_input, train_target)\n",
+    "\n",
+    "print(score)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Text2Bow Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To use Text2Bow model begin with importing Text2Bow wrapper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from gensim.sklearn_api import Text2BowTransformer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Example of Using Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.947147651007\n"
+     ]
+    }
+   ],
+   "source": [
+    "text2bow_model = Text2BowTransformer()\n",
+    "lda_model = LdaTransformer(num_topics=2, passes=10, minimum_probability=0, random_state=np.random.seed(0))\n",
+    "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)\n",
+    "text_t2b = Pipeline([('bow_model', text2bow_model), ('ldamodel', lda_model), ('classifier', clf)])\n",
+    "text_t2b.fit(data.data, data.target)\n",
+    "score = text_t2b.score(data.data, data.target)\n",
+    "\n",
+    "print(score)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### TfIdf Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To use TfIdf model begin with importing TfIdf wrapper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from gensim.sklearn_api import TfIdfTransformer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Example of Using Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.578859060403\n"
+     ]
+    }
+   ],
+   "source": [
+    "tfidf_model = TfIdfTransformer()\n",
+    "tfidf_model.fit(corpus)\n",
+    "lda_model = LdaTransformer(num_topics=2, passes=10, minimum_probability=0, random_state=np.random.seed(0))\n",
+    "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)\n",
+    "text_tfidf = Pipeline((('tfidf_model', tfidf_model), ('ldamodel', lda_model), ('classifier', clf)))\n",
+    "text_tfidf.fit(corpus, data.target)\n",
+    "score = text_tfidf.score(corpus, data.target)\n",
+    "\n",
+    "print(score)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### HDP Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To use HDP model begin with importing HDP wrapper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from gensim.sklearn_api import HdpTransformer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Example of Using Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.848154362416\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = HdpTransformer(id2word=id2word)\n",
+    "clf = linear_model.LogisticRegression(penalty='l2', C=0.1)\n",
+    "text_hdp = Pipeline([('features', model,), ('classifier', clf)])\n",
+    "text_hdp.fit(corpus, data.target)\n",
+    "score = text_hdp.score(corpus, data.target)\n",
+    "\n",
+    "print(score)"
    ]
   },
   {
diff --git a/gensim/sklearn_api/__init__.py b/gensim/sklearn_api/__init__.py
index eb3e652fed..570fc7e875 100644
--- a/gensim/sklearn_api/__init__.py
+++ b/gensim/sklearn_api/__init__.py
@@ -17,3 +17,8 @@
 from .ldaseqmodel import LdaSeqTransformer  # noqa: F401
 from .w2vmodel import W2VTransformer  # noqa: F401
 from .atmodel import AuthorTopicTransformer  # noqa: F401
+from .d2vmodel import D2VTransformer  # noqa: F401
+from .text2bow import Text2BowTransformer  # noqa: F401
+from .tfidf import TfIdfTransformer  # noqa: F401
+from .hdp import HdpTransformer  # noqa: F401
+from .phrases import PhrasesTransformer  # noqa: F401
diff --git a/gensim/sklearn_api/d2vmodel.py b/gensim/sklearn_api/d2vmodel.py
new file mode 100644
index 0000000000..05d496d9b1
--- /dev/null
+++ b/gensim/sklearn_api/d2vmodel.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2011 Radim Rehurek <radimrehurek@seznam.cz>
+# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
+
+"""
+Scikit learn interface for gensim for easy use of gensim with scikit-learn
+Follows scikit-learn API conventions
+"""
+
+import numpy as np
+from six import string_types
+from sklearn.base import TransformerMixin, BaseEstimator
+from sklearn.exceptions import NotFittedError
+
+from gensim import models
+
+
+class D2VTransformer(TransformerMixin, BaseEstimator):
+    """
+    Base Doc2Vec module
+    """
+
+    def __init__(self, dm_mean=None, dm=1, dbow_words=0, dm_concat=0,
+                dm_tag_count=1, docvecs=None, docvecs_mapfile=None,
+                comment=None, trim_rule=None, size=100, alpha=0.025,
+                window=5, min_count=5, max_vocab_size=None, sample=1e-3,
+                seed=1, workers=3, min_alpha=0.0001, hs=0, negative=5,
+                cbow_mean=1, hashfxn=hash, iter=5, sorted_vocab=1,
+                batch_words=10000):
+        """
+        Sklearn api for Doc2Vec model. See gensim.models.Doc2Vec and gensim.models.Word2Vec for parameter details.
+        """
+        self.gensim_model = None
+        self.dm_mean = dm_mean
+        self.dm = dm
+        self.dbow_words = dbow_words
+        self.dm_concat = dm_concat
+        self.dm_tag_count = dm_tag_count
+        self.docvecs = docvecs
+        self.docvecs_mapfile = docvecs_mapfile
+        self.comment = comment
+        self.trim_rule = trim_rule
+
+        # attributes associated with gensim.models.Word2Vec
+        self.size = size
+        self.alpha = alpha
+        self.window = window
+        self.min_count = min_count
+        self.max_vocab_size = max_vocab_size
+        self.sample = sample
+        self.seed = seed
+        self.workers = workers
+        self.min_alpha = min_alpha
+        self.hs = hs
+        self.negative = negative
+        self.cbow_mean = int(cbow_mean)
+        self.hashfxn = hashfxn
+        self.iter = iter
+        self.sorted_vocab = sorted_vocab
+        self.batch_words = batch_words
+
+    def fit(self, X, y=None):
+        """
+        Fit the model according to the given training data.
+        Calls gensim.models.Doc2Vec
+        """
+        self.gensim_model = models.Doc2Vec(documents=X, dm_mean=self.dm_mean, dm=self.dm,
+            dbow_words=self.dbow_words, dm_concat=self.dm_concat, dm_tag_count=self.dm_tag_count,
+            docvecs=self.docvecs, docvecs_mapfile=self.docvecs_mapfile, comment=self.comment,
+            trim_rule=self.trim_rule, size=self.size, alpha=self.alpha, window=self.window,
+            min_count=self.min_count, max_vocab_size=self.max_vocab_size, sample=self.sample,
+            seed=self.seed, workers=self.workers, min_alpha=self.min_alpha, hs=self.hs,
+            negative=self.negative, cbow_mean=self.cbow_mean, hashfxn=self.hashfxn,
+            iter=self.iter, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words)
+        return self
+
+    def transform(self, docs):
+        """
+        Return the vector representations for the input documents.
+        The input `docs` should be a list of lists like : [ ['calculus', 'mathematical'], ['geometry', 'operations', 'curves'] ]
+        or a single document like : ['calculus', 'mathematical']
+        """
+        if self.gensim_model is None:
+            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+
+        # The input as array of array
+        check = lambda x: [x] if isinstance(x[0], string_types) else x
+        docs = check(docs)
+        X = [[] for _ in range(0, len(docs))]
+
+        for k, v in enumerate(docs):
+            doc_vec = self.gensim_model.infer_vector(v)
+            X[k] = doc_vec
+
+        return np.reshape(np.array(X), (len(docs), self.gensim_model.vector_size))
diff --git a/gensim/sklearn_api/hdp.py b/gensim/sklearn_api/hdp.py
new file mode 100644
index 0000000000..92265a5e8f
--- /dev/null
+++ b/gensim/sklearn_api/hdp.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2011 Radim Rehurek <radimrehurek@seznam.cz>
+# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
+
+"""
+Scikit learn interface for gensim for easy use of gensim with scikit-learn
+Follows scikit-learn API conventions
+"""
+
+import numpy as np
+from scipy import sparse
+from sklearn.base import TransformerMixin, BaseEstimator
+from sklearn.exceptions import NotFittedError
+
+from gensim import models
+from gensim import matutils
+
+
+class HdpTransformer(TransformerMixin, BaseEstimator):
+    """
+    Base HDP module
+    """
+
+    def __init__(self, id2word, max_chunks=None, max_time=None,
+            chunksize=256, kappa=1.0, tau=64.0, K=15, T=150, alpha=1,
+            gamma=1, eta=0.01, scale=1.0, var_converge=0.0001,
+            outputdir=None, random_state=None):
+        """
+        Sklearn api for HDP model. See gensim.models.HdpModel for parameter details.
+        """
+        self.gensim_model = None
+        self.id2word = id2word
+        self.max_chunks = max_chunks
+        self.max_time = max_time
+        self.chunksize = chunksize
+        self.kappa = kappa
+        self.tau = tau
+        self.K = K
+        self.T = T
+        self.alpha = alpha
+        self.gamma = gamma
+        self.eta = eta
+        self.scale = scale
+        self.var_converge = var_converge
+        self.outputdir = outputdir
+        self.random_state = random_state
+
+    def fit(self, X, y=None):
+        """
+        Fit the model according to the given training data.
+        Calls gensim.models.HdpModel
+        """
+        if sparse.issparse(X):
+            corpus = matutils.Sparse2Corpus(X)
+        else:
+            corpus = X
+
+        self.gensim_model = models.HdpModel(corpus=corpus, id2word=self.id2word, max_chunks=self.max_chunks,
+            max_time=self.max_time, chunksize=self.chunksize, kappa=self.kappa, tau=self.tau,
+            K=self.K, T=self.T, alpha=self.alpha, gamma=self.gamma, eta=self.eta, scale=self.scale,
+            var_converge=self.var_converge, outputdir=self.outputdir, random_state=self.random_state)
+        return self
+
+    def transform(self, docs):
+        """
+        Takes a list of documents as input ('docs').
+        Returns a matrix of topic distribution for the given document bow, where a_ij
+        indicates (topic_i, topic_probability_j).
+        The input `docs` should be in BOW format and can be a list of documents like : [ [(4, 1), (7, 1)], [(9, 1), (13, 1)], [(2, 1), (6, 1)] ]
+        or a single document like : [(4, 1), (7, 1)]
+        """
+        if self.gensim_model is None:
+            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+
+        # The input as array of array
+        check = lambda x: [x] if isinstance(x[0], tuple) else x
+        docs = check(docs)
+        X = [[] for _ in range(0, len(docs))]
+
+        max_num_topics = 0
+        for k, v in enumerate(docs):
+            X[k] = self.gensim_model[v]
+            max_num_topics = max(max_num_topics, max(list(map(lambda x: x[0], X[k]))) + 1)
+
+        for k, v in enumerate(X):
+            # returning dense representation for compatibility with sklearn but we should go back to sparse representation in the future
+            dense_vec = matutils.sparse2full(v, max_num_topics)
+            X[k] = dense_vec
+
+        return np.reshape(np.array(X), (len(docs), max_num_topics))
+
+    def partial_fit(self, X):
+        """
+        Train model over X.
+        """
+        if sparse.issparse(X):
+            X = matutils.Sparse2Corpus(X)
+
+        if self.gensim_model is None:
+            self.gensim_model = models.HdpModel(id2word=self.id2word, max_chunks=self.max_chunks,
+                max_time=self.max_time, chunksize=self.chunksize, kappa=self.kappa, tau=self.tau,
+                K=self.K, T=self.T, alpha=self.alpha, gamma=self.gamma, eta=self.eta, scale=self.scale,
+                var_converge=self.var_converge, outputdir=self.outputdir, random_state=self.random_state)
+
+        self.gensim_model.update(corpus=X)
+        return self
diff --git a/gensim/sklearn_api/phrases.py b/gensim/sklearn_api/phrases.py
new file mode 100644
index 0000000000..8a944f0235
--- /dev/null
+++ b/gensim/sklearn_api/phrases.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2011 Radim Rehurek <radimrehurek@seznam.cz>
+# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
+
+"""
+Scikit learn interface for gensim for easy use of gensim with scikit-learn
+Follows scikit-learn API conventions
+"""
+
+from six import string_types
+from sklearn.base import TransformerMixin, BaseEstimator
+from sklearn.exceptions import NotFittedError
+
+from gensim import models
+
+
+class PhrasesTransformer(TransformerMixin, BaseEstimator):
+    """
+    Base Phrases module
+    """
+
+    def __init__(self, min_count=5, threshold=10.0, max_vocab_size=40000000,
+            delimiter=b'_', progress_per=10000):
+        """
+        Sklearn wrapper for Phrases model.
+        """
+        self.gensim_model = None
+        self.min_count = min_count
+        self.threshold = threshold
+        self.max_vocab_size = max_vocab_size
+        self.delimiter = delimiter
+        self.progress_per = progress_per
+
+    def fit(self, X, y=None):
+        """
+        Fit the model according to the given training data.
+        """
+        self.gensim_model = models.Phrases(sentences=X, min_count=self.min_count, threshold=self.threshold,
+            max_vocab_size=self.max_vocab_size, delimiter=self.delimiter, progress_per=self.progress_per)
+        return self
+
+    def transform(self, docs):
+        """
+        Return the input documents to return phrase tokens.
+        """
+        if self.gensim_model is None:
+            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+
+        # input as python lists
+        check = lambda x: [x] if isinstance(x[0], string_types) else x
+        docs = check(docs)
+        X = [[] for _ in range(0, len(docs))]
+
+        for k, v in enumerate(docs):
+            phrase_tokens = self.gensim_model[v]
+            X[k] = phrase_tokens
+
+        return X
+
+    def partial_fit(self, X):
+        if self.gensim_model is None:
+            self.gensim_model = models.Phrases(sentences=X, min_count=self.min_count, threshold=self.threshold,
+                max_vocab_size=self.max_vocab_size, delimiter=self.delimiter, progress_per=self.progress_per)
+
+        self.gensim_model.add_vocab(X)
+        return self
diff --git a/gensim/sklearn_api/text2bow.py b/gensim/sklearn_api/text2bow.py
new file mode 100644
index 0000000000..e5a96e6551
--- /dev/null
+++ b/gensim/sklearn_api/text2bow.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2011 Radim Rehurek <radimrehurek@seznam.cz>
+# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
+
+"""
+Scikit learn interface for gensim for easy use of gensim with scikit-learn
+Follows scikit-learn API conventions
+"""
+
+from six import string_types
+from sklearn.base import TransformerMixin, BaseEstimator
+from sklearn.exceptions import NotFittedError
+
+from gensim.corpora import Dictionary
+from gensim.utils import tokenize
+
+
+class Text2BowTransformer(TransformerMixin, BaseEstimator):
+    """
+    Base Text2Bow module
+    """
+
+    def __init__(self, prune_at=2000000, tokenizer=tokenize):
+        """
+        Sklearn wrapper for Text2Bow model.
+        """
+        self.gensim_model = None
+        self.prune_at = prune_at
+        self.tokenizer = tokenizer
+
+    def fit(self, X, y=None):
+        """
+        Fit the model according to the given training data.
+        """
+        tokenized_docs = list(map(lambda x: list(self.tokenizer(x)), X))
+        self.gensim_model = Dictionary(documents=tokenized_docs, prune_at=self.prune_at)
+        return self
+
+    def transform(self, docs):
+        """
+        Return the BOW format for the input documents.
+        """
+        if self.gensim_model is None:
+            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+
+        # input as python lists
+        check = lambda x: [x] if isinstance(x, string_types) else x
+        docs = check(docs)
+        tokenized_docs = list(map(lambda x: list(self.tokenizer(x)), docs))
+        X = [[] for _ in range(0, len(tokenized_docs))]
+
+        for k, v in enumerate(tokenized_docs):
+            bow_val = self.gensim_model.doc2bow(v)
+            X[k] = bow_val
+
+        return X
+
+    def partial_fit(self, X):
+        if self.gensim_model is None:
+            self.gensim_model = Dictionary(prune_at=self.prune_at)
+
+        tokenized_docs = list(map(lambda x: list(self.tokenizer(x)), X))
+        self.gensim_model.add_documents(tokenized_docs)
+        return self
diff --git a/gensim/sklearn_api/tfidf.py b/gensim/sklearn_api/tfidf.py
new file mode 100644
index 0000000000..ca34af6b40
--- /dev/null
+++ b/gensim/sklearn_api/tfidf.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2011 Radim Rehurek <radimrehurek@seznam.cz>
+# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
+
+"""
+Scikit learn interface for gensim for easy use of gensim with scikit-learn
+Follows scikit-learn API conventions
+"""
+
+from sklearn.base import TransformerMixin, BaseEstimator
+from sklearn.exceptions import NotFittedError
+
+import gensim
+from gensim.models import TfidfModel
+
+
+class TfIdfTransformer(TransformerMixin, BaseEstimator):
+    """
+    Base Tf-Idf module
+    """
+
+    def __init__(self, corpus=None, id2word=None, dictionary=None,
+            wlocal=gensim.utils.identity, wglobal=gensim.models.tfidfmodel.df2idf, normalize=True):
+        """
+        Sklearn wrapper for Tf-Idf model.
+        """
+        self.gensim_model = None
+        self.id2word = id2word
+        self.dictionary = dictionary
+        self.wlocal = wlocal
+        self.wglobal = wglobal
+        self.normalize = normalize
+
+    def fit(self, X, y=None):
+        """
+        Fit the model according to the given training data.
+        """
+        self.gensim_model = TfidfModel(corpus=X, id2word=self.id2word, dictionary=self.dictionary,
+            wlocal=self.wlocal, wglobal=self.wglobal, normalize=self.normalize)
+        return self
+
+    def transform(self, docs):
+        """
+        Return the transformed documents after multiplication with the tf-idf matrix.
+        """
+        if self.gensim_model is None:
+            raise NotFittedError("This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method.")
+
+        # input as python lists
+        check = lambda x: [x] if isinstance(x[0], tuple) else x
+        docs = check(docs)
+        X = [[] for _ in range(0, len(docs))]
+
+        for k, v in enumerate(docs):
+            transformed_doc = self.gensim_model[v]
+            X[k] = transformed_doc
+
+        return X
diff --git a/gensim/test/test_sklearn_api.py b/gensim/test/test_sklearn_api.py
index a4529c3346..d990cc403e 100644
--- a/gensim/test/test_sklearn_api.py
+++ b/gensim/test/test_sklearn_api.py
@@ -18,9 +18,13 @@
 from gensim.sklearn_api.ldaseqmodel import LdaSeqTransformer
 from gensim.sklearn_api.w2vmodel import W2VTransformer
 from gensim.sklearn_api.atmodel import AuthorTopicTransformer
+from gensim.sklearn_api.d2vmodel import D2VTransformer
+from gensim.sklearn_api.text2bow import Text2BowTransformer
+from gensim.sklearn_api.tfidf import TfIdfTransformer
+from gensim.sklearn_api.hdp import HdpTransformer
+from gensim.sklearn_api.phrases import PhrasesTransformer
 from gensim.corpora import mmcorpus, Dictionary
-from gensim import models
-from gensim import matutils
+from gensim import matutils, models
 
 module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder
 datapath = lambda fname: os.path.join(module_path, 'test_data', fname)
@@ -94,6 +98,33 @@
     ['advances', 'in', 'the', 'understanding', 'of', 'electromagnetism', 'or', 'nuclear', 'physics', 'led', 'directly', 'to', 'the', 'development', 'of', 'new', 'products', 'that', 'have', 'dramatically', 'transformed', 'modern', 'day', 'society']
 ]
 
+d2v_sentences = [models.doc2vec.TaggedDocument(words, [i]) for i, words in enumerate(w2v_texts)]
+
+dict_texts = [
+    'human interface computer',
+    'survey user computer system response time',
+    'eps user interface system',
+    'system human system eps',
+    'user response time',
+    'trees',
+    'graph trees',
+    'graph minors trees',
+    'graph minors survey'
+]
+
+phrases_sentences = [
+    ['human', 'interface', 'computer'],
+    ['survey', 'user', 'computer', 'system', 'response', 'time'],
+    ['eps', 'user', 'interface', 'system'],
+    ['system', 'human', 'system', 'eps'],
+    ['user', 'response', 'time'],
+    ['trees'],
+    ['graph', 'trees'],
+    ['graph', 'minors', 'trees'],
+    ['graph', 'minors', 'survey'],
+    ['graph', 'minors', 'survey', 'human', 'interface']
+]
+
 
 class TestLdaWrapper(unittest.TestCase):
     def setUp(self):
@@ -163,7 +194,7 @@ def testPipeline(self):
         corpus = [id2word.doc2bow(i.split()) for i in data.data]
         numpy.random.mtrand.RandomState(1)  # set seed for getting same result
         clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
-        text_lda = Pipeline((('features', model,), ('classifier', clf)))
+        text_lda = Pipeline([('features', model,), ('classifier', clf)])
         text_lda.fit(corpus, data.target)
         score = text_lda.score(corpus, data.target)
         self.assertGreater(score, 0.40)
@@ -253,7 +284,7 @@ def testPipeline(self):
         corpus = [id2word.doc2bow(i.split()) for i in data.data]
         numpy.random.mtrand.RandomState(1)  # set seed for getting same result
         clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
-        text_lsi = Pipeline((('features', model,), ('classifier', clf)))
+        text_lsi = Pipeline([('features', model,), ('classifier', clf)])
         text_lsi.fit(corpus, data.target)
         score = text_lsi.score(corpus, data.target)
         self.assertGreater(score, 0.50)
@@ -336,7 +367,7 @@ def testPipeline(self):
         corpus = [id2word.doc2bow(i.split()) for i in test_data]
         model = LdaSeqTransformer(id2word=id2word, num_topics=2, time_slice=[1, 1, 1], initialize='gensim')
         clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
-        text_ldaseq = Pipeline((('features', model,), ('classifier', clf)))
+        text_ldaseq = Pipeline([('features', model,), ('classifier', clf)])
         text_ldaseq.fit(corpus, test_target)
         score = text_ldaseq.score(corpus, test_target)
         self.assertGreater(score, 0.50)
@@ -406,7 +437,7 @@ def testPipeline(self):
         corpus = [id2word.doc2bow(i.split()) for i in data.data]
         numpy.random.mtrand.RandomState(1)  # set seed for getting same result
         clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
-        text_rp = Pipeline((('features', model,), ('classifier', clf)))
+        text_rp = Pipeline([('features', model,), ('classifier', clf)])
         text_rp.fit(corpus, data.target)
         score = text_rp.score(corpus, data.target)
         self.assertGreater(score, 0.40)
@@ -491,7 +522,7 @@ def testPipeline(self):
 
         clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
         clf.fit(model.transform(train_input), train_target)
-        text_w2v = Pipeline((('features', model,), ('classifier', clf)))
+        text_w2v = Pipeline([('features', model,), ('classifier', clf)])
         score = text_w2v.score(train_input, train_target)
         self.assertGreater(score, 0.40)
 
@@ -562,7 +593,7 @@ def testPipeline(self):
         clstr.fit(model.transform(authors_full))
 
         # stack together the two models in a pipeline
-        text_atm = Pipeline((('features', model,), ('cluster', clstr)))
+        text_atm = Pipeline([('features', model,), ('cluster', clstr)])
         author_list = ['jane', 'jack', 'jill']
         ret_val = text_atm.predict(author_list)
         self.assertEqual(len(ret_val), len(author_list))
@@ -609,5 +640,331 @@ def testModelNotFitted(self):
         self.assertRaises(NotFittedError, atmodel_wrapper.transform, author_list)
 
 
+class TestD2VTransformer(unittest.TestCase):
+    def setUp(self):
+        numpy.random.seed(0)
+        self.model = D2VTransformer(min_count=1)
+        self.model.fit(d2v_sentences)
+
+    def testTransform(self):
+        # tranform multiple documents
+        docs = []
+        docs.append(w2v_texts[0])
+        docs.append(w2v_texts[1])
+        docs.append(w2v_texts[2])
+        matrix = self.model.transform(docs)
+        self.assertEqual(matrix.shape[0], 3)
+        self.assertEqual(matrix.shape[1], self.model.size)
+
+        # tranform one document
+        doc = w2v_texts[0]
+        matrix = self.model.transform(doc)
+        self.assertEqual(matrix.shape[0], 1)
+        self.assertEqual(matrix.shape[1], self.model.size)
+
+    def testSetGetParams(self):
+        # updating only one param
+        self.model.set_params(negative=20)
+        model_params = self.model.get_params()
+        self.assertEqual(model_params["negative"], 20)
+
+        # verify that the attributes values are also changed for `gensim_model` after fitting
+        self.model.fit(d2v_sentences)
+        self.assertEqual(getattr(self.model.gensim_model, 'negative'), 20)
+
+    def testPipeline(self):
+        numpy.random.seed(0)  # set fixed seed to get similar values everytime
+        model = D2VTransformer(min_count=1)
+        model.fit(d2v_sentences)
+
+        class_dict = {'mathematics': 1, 'physics': 0}
+        train_data = [
+            (['calculus', 'mathematical'], 'mathematics'), (['geometry', 'operations', 'curves'], 'mathematics'),
+            (['natural', 'nuclear'], 'physics'), (['science', 'electromagnetism', 'natural'], 'physics')
+        ]
+        train_input = list(map(lambda x: x[0], train_data))
+        train_target = list(map(lambda x: class_dict[x[1]], train_data))
+
+        clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
+        clf.fit(model.transform(train_input), train_target)
+        text_w2v = Pipeline([('features', model,), ('classifier', clf)])
+        score = text_w2v.score(train_input, train_target)
+        self.assertGreater(score, 0.40)
+
+    def testPersistence(self):
+        model_dump = pickle.dumps(self.model)
+        model_load = pickle.loads(model_dump)
+
+        doc = w2v_texts[0]
+        loaded_transformed_vecs = model_load.transform(doc)
+
+        # sanity check for transformation operation
+        self.assertEqual(loaded_transformed_vecs.shape[0], 1)
+        self.assertEqual(loaded_transformed_vecs.shape[1], model_load.size)
+
+        # comparing the original and loaded models
+        original_transformed_vecs = self.model.transform(doc)
+        passed = numpy.allclose(sorted(loaded_transformed_vecs), sorted(original_transformed_vecs), atol=1e-1)
+        self.assertTrue(passed)
+
+    def testConsistencyWithGensimModel(self):
+        # training a D2VTransformer
+        self.model = D2VTransformer(min_count=1)
+        self.model.fit(d2v_sentences)
+
+        # training a Gensim Doc2Vec model with the same params
+        gensim_d2vmodel = models.Doc2Vec(d2v_sentences, min_count=1)
+
+        doc = w2v_texts[0]
+        vec_transformer_api = self.model.transform(doc)  # vector returned by D2VTransformer
+        vec_gensim_model = gensim_d2vmodel[doc]  # vector returned by Doc2Vec
+        passed = numpy.allclose(vec_transformer_api, vec_gensim_model, atol=1e-1)
+        self.assertTrue(passed)
+
+    def testModelNotFitted(self):
+        d2vmodel_wrapper = D2VTransformer(min_count=1)
+        self.assertRaises(NotFittedError, d2vmodel_wrapper.transform, 1)
+
+
+class TestText2BowTransformer(unittest.TestCase):
+    def setUp(self):
+        numpy.random.seed(0)
+        self.model = Text2BowTransformer()
+        self.model.fit(dict_texts)
+
+    def testTransform(self):
+        # tranform one document
+        doc = ['computer system interface time computer system']
+        bow_vec = self.model.transform(doc)[0]
+        expected_values = [1, 1, 2, 2]  # comparing only the word-counts
+        values = list(map(lambda x: x[1], bow_vec))
+        self.assertEqual(sorted(expected_values), sorted(values))
+
+    def testSetGetParams(self):
+        # updating only one param
+        self.model.set_params(prune_at=1000000)
+        model_params = self.model.get_params()
+        self.assertEqual(model_params["prune_at"], 1000000)
+
+    def testPipeline(self):
+        with open(datapath('mini_newsgroup'), 'rb') as f:
+            compressed_content = f.read()
+            uncompressed_content = codecs.decode(compressed_content, 'zlib_codec')
+            cache = pickle.loads(uncompressed_content)
+        data = cache
+        text2bow_model = Text2BowTransformer()
+        lda_model = LdaTransformer(num_topics=2, passes=10, minimum_probability=0, random_state=numpy.random.seed(0))
+        numpy.random.mtrand.RandomState(1)  # set seed for getting same result
+        clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
+        text_lda = Pipeline([('bow_model', text2bow_model), ('ldamodel', lda_model), ('classifier', clf)])
+        text_lda.fit(data.data, data.target)
+        score = text_lda.score(data.data, data.target)
+        self.assertGreater(score, 0.40)
+
+    def testPersistence(self):
+        model_dump = pickle.dumps(self.model)
+        model_load = pickle.loads(model_dump)
+
+        doc = dict_texts[0]
+        loaded_transformed_vecs = model_load.transform(doc)
+
+        # comparing the original and loaded models
+        original_transformed_vecs = self.model.transform(doc)
+        self.assertEqual(original_transformed_vecs, loaded_transformed_vecs)
+
+    def testModelNotFitted(self):
+        text2bow_wrapper = Text2BowTransformer()
+        self.assertRaises(NotFittedError, text2bow_wrapper.transform, dict_texts[0])
+
+
+class TestTfIdfTransformer(unittest.TestCase):
+    def setUp(self):
+        numpy.random.seed(0)
+        self.model = TfIdfTransformer(normalize=True)
+        self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
+        self.model.fit(self.corpus)
+
+    def testTransform(self):
+        # tranform one document
+        doc = corpus[0]
+        transformed_doc = self.model.transform(doc)
+        expected_doc = [[(0, 0.5773502691896257), (1, 0.5773502691896257), (2, 0.5773502691896257)]]
+        self.assertTrue(numpy.allclose(transformed_doc, expected_doc))
+
+        # tranform multiple documents
+        docs = [corpus[0], corpus[1]]
+        transformed_docs = self.model.transform(docs)
+        expected_docs = [[(0, 0.5773502691896257), (1, 0.5773502691896257), (2, 0.5773502691896257)],
+            [(3, 0.44424552527467476), (4, 0.44424552527467476), (5, 0.3244870206138555), (6, 0.44424552527467476), (7, 0.3244870206138555), (8, 0.44424552527467476)]]
+        self.assertTrue(numpy.allclose(transformed_docs[0], expected_docs[0]))
+        self.assertTrue(numpy.allclose(transformed_docs[1], expected_docs[1]))
+
+    def testSetGetParams(self):
+        # updating only one param
+        self.model.set_params(normalize=False)
+        model_params = self.model.get_params()
+        self.assertEqual(model_params["normalize"], False)
+
+        # verify that the attributes values are also changed for `gensim_model` after fitting
+        self.model.fit(self.corpus)
+        self.assertEqual(getattr(self.model.gensim_model, 'normalize'), False)
+
+    def testPipeline(self):
+        with open(datapath('mini_newsgroup'), 'rb') as f:
+            compressed_content = f.read()
+            uncompressed_content = codecs.decode(compressed_content, 'zlib_codec')
+            cache = pickle.loads(uncompressed_content)
+        data = cache
+        id2word = Dictionary(map(lambda x: x.split(), data.data))
+        corpus = [id2word.doc2bow(i.split()) for i in data.data]
+        tfidf_model = TfIdfTransformer()
+        tfidf_model.fit(corpus)
+        lda_model = LdaTransformer(num_topics=2, passes=10, minimum_probability=0, random_state=numpy.random.seed(0))
+        numpy.random.mtrand.RandomState(1)  # set seed for getting same result
+        clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
+        text_tfidf = Pipeline([('tfidf_model', tfidf_model), ('ldamodel', lda_model), ('classifier', clf)])
+        text_tfidf.fit(corpus, data.target)
+        score = text_tfidf.score(corpus, data.target)
+        self.assertGreater(score, 0.40)
+
+    def testPersistence(self):
+        model_dump = pickle.dumps(self.model)
+        model_load = pickle.loads(model_dump)
+
+        doc = corpus[0]
+        loaded_transformed_doc = model_load.transform(doc)
+
+        # comparing the original and loaded models
+        original_transformed_doc = self.model.transform(doc)
+        self.assertEqual(original_transformed_doc, loaded_transformed_doc)
+
+    def testModelNotFitted(self):
+        tfidf_wrapper = TfIdfTransformer()
+        self.assertRaises(NotFittedError, tfidf_wrapper.transform, corpus[0])
+
+
+class TestHdpTransformer(unittest.TestCase):
+    def setUp(self):
+        numpy.random.seed(0)
+        self.model = HdpTransformer(id2word=dictionary)
+        self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
+        self.model.fit(self.corpus)
+
+    def testTransform(self):
+        # tranform one document
+        doc = self.corpus[0]
+        transformed_doc = self.model.transform(doc)
+        expected_doc = [[0.81043386270128193, 0.049357139518070477, 0.035840906753517532, 0.026542006926698079, 0.019925705902962578, 0.014776690981729117, 0.011068909979528148]]
+        self.assertTrue(numpy.allclose(transformed_doc, expected_doc))
+
+        # tranform multiple documents
+        docs = [self.corpus[0], self.corpus[1]]
+        transformed_docs = self.model.transform(docs)
+        expected_docs = [[0.81043386270128193, 0.049357139518070477, 0.035840906753517532, 0.026542006926698079, 0.019925705902962578, 0.014776690981729117, 0.011068909979528148],
+            [0.0368655605, 0.709055041, 0.194436428, 0.0151706795, 0.0113863652, 1.00000000e-12, 1.00000000e-12]]
+        self.assertTrue(numpy.allclose(transformed_docs[0], expected_docs[0]))
+        self.assertTrue(numpy.allclose(transformed_docs[1], expected_docs[1]))
+
+    def testPartialFit(self):
+        for i in range(10):
+            self.model.partial_fit(X=self.corpus)  # fit against the model again
+            doc = list(self.corpus)[0]  # transform only the first document
+            transformed = self.model.transform(doc)
+        expected = numpy.array([0.76777752, 0.01757334, 0.01600339, 0.01374061, 0.01275931, 0.01126313, 0.01058131, 0.01167185])
+        passed = numpy.allclose(sorted(transformed[0]), sorted(expected), atol=1e-1)
+        self.assertTrue(passed)
+
+    def testSetGetParams(self):
+        # updating only one param
+        self.model.set_params(var_converge=0.05)
+        model_params = self.model.get_params()
+        self.assertEqual(model_params["var_converge"], 0.05)
+
+        # verify that the attributes values are also changed for `gensim_model` after fitting
+        self.model.fit(self.corpus)
+        self.assertEqual(getattr(self.model.gensim_model, 'm_var_converge'), 0.05)
+
+    def testPipeline(self):
+        with open(datapath('mini_newsgroup'), 'rb') as f:
+            compressed_content = f.read()
+            uncompressed_content = codecs.decode(compressed_content, 'zlib_codec')
+            cache = pickle.loads(uncompressed_content)
+        data = cache
+        id2word = Dictionary(map(lambda x: x.split(), data.data))
+        corpus = [id2word.doc2bow(i.split()) for i in data.data]
+        model = HdpTransformer(id2word=id2word)
+        clf = linear_model.LogisticRegression(penalty='l2', C=0.1)
+        text_lda = Pipeline([('features', model,), ('classifier', clf)])
+        text_lda.fit(corpus, data.target)
+        score = text_lda.score(corpus, data.target)
+        self.assertGreater(score, 0.40)
+
+    def testPersistence(self):
+        model_dump = pickle.dumps(self.model)
+        model_load = pickle.loads(model_dump)
+
+        doc = corpus[0]
+        loaded_transformed_doc = model_load.transform(doc)
+
+        # comparing the original and loaded models
+        original_transformed_doc = self.model.transform(doc)
+        self.assertTrue(numpy.allclose(original_transformed_doc, loaded_transformed_doc))
+
+    def testModelNotFitted(self):
+        hdp_wrapper = HdpTransformer(id2word=dictionary)
+        self.assertRaises(NotFittedError, hdp_wrapper.transform, corpus[0])
+
+
+class TestPhrasesTransformer(unittest.TestCase):
+    def setUp(self):
+        numpy.random.seed(0)
+        self.model = PhrasesTransformer(min_count=1, threshold=1)
+        self.model.fit(phrases_sentences)
+
+    def testTransform(self):
+        # tranform one document
+        doc = phrases_sentences[-1]
+        phrase_tokens = self.model.transform(doc)[0]
+        expected_phrase_tokens = [u'graph_minors', u'survey', u'human_interface']
+        self.assertEqual(phrase_tokens, expected_phrase_tokens)
+
+    def testPartialFit(self):
+        new_sentences = [
+            ['world', 'peace', 'humans', 'world', 'peace', 'world', 'peace', 'people'],
+            ['world', 'peace', 'people'],
+            ['world', 'peace', 'humans']
+        ]
+        self.model.partial_fit(X=new_sentences)  # train model with new sentences
+
+        doc = ['graph', 'minors', 'survey', 'human', 'interface', 'world', 'peace']
+        phrase_tokens = self.model.transform(doc)[0]
+        expected_phrase_tokens = [u'graph_minors', u'survey', u'human_interface', u'world_peace']
+        self.assertEqual(phrase_tokens, expected_phrase_tokens)
+
+    def testSetGetParams(self):
+        # updating only one param
+        self.model.set_params(progress_per=5000)
+        model_params = self.model.get_params()
+        self.assertEqual(model_params["progress_per"], 5000)
+
+        # verify that the attributes values are also changed for `gensim_model` after fitting
+        self.model.fit(phrases_sentences)
+        self.assertEqual(getattr(self.model.gensim_model, 'progress_per'), 5000)
+
+    def testPersistence(self):
+        model_dump = pickle.dumps(self.model)
+        model_load = pickle.loads(model_dump)
+
+        doc = phrases_sentences[-1]
+        loaded_phrase_tokens = model_load.transform(doc)
+
+        # comparing the original and loaded models
+        original_phrase_tokens = self.model.transform(doc)
+        self.assertEqual(original_phrase_tokens, loaded_phrase_tokens)
+
+    def testModelNotFitted(self):
+        phrases_transformer = PhrasesTransformer()
+        self.assertRaises(NotFittedError, phrases_transformer.transform, phrases_sentences[0])
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/setup.py b/setup.py
index 912fb1075a..6097e34c5a 100644
--- a/setup.py
+++ b/setup.py
@@ -229,7 +229,7 @@ def finalize_options(self):
 test_env = [
     'testfixtures',
     'Morfessor == 2.0.2a4',
-    'scikit-learn == 0.18.2',
+    'scikit-learn',
     'pyemd',
     'annoy',
     'tensorflow >= 1.1.0',