Changing file places

MartinKondor · Apr 1, 2019 · f501d08 · f501d08
1 parent 932e774
commit f501d08
Show file tree

Hide file tree

Showing 9 changed files with 67 additions and 22 deletions.
diff --git a/models/README.md b/models/README.md
@@ -0,0 +1 @@
+# Models
diff --git a/trainers/Myers–Briggs Type.ipynb → notebooks/Myers–Briggs Type.ipynb b/trainers/Myers–Briggs Type.ipynb → notebooks/Myers–Briggs Type.ipynb
@@ -18,7 +18,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -126,41 +126,41 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>3395</th>\n",
-       "      <td>INFJ</td>\n",
-       "      <td>'If your boyfriend/girlfriend/spouse gets sick...</td>\n",
+       "      <th>7057</th>\n",
+       "      <td>ISTJ</td>\n",
+       "      <td>'At my work, passive-aggressive behavior is wh...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>6486</th>\n",
-       "      <td>INFP</td>\n",
-       "      <td>'So I made a blog... After 5 years of wanting ...</td>\n",
+       "      <th>3261</th>\n",
+       "      <td>INFJ</td>\n",
+       "      <td>'@Macrosapien  I agree with the victim part. W...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>4738</th>\n",
-       "      <td>INFP</td>\n",
-       "      <td>'http://i.imgur.com/gwbS9U2.jpg|||http://image...</td>\n",
+       "      <th>760</th>\n",
+       "      <td>ENTJ</td>\n",
+       "      <td>'6w7  http://youtu.be/jSWIUEV5sPQ|||sx/sp 5w4 ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>123</th>\n",
-       "      <td>ISTP</td>\n",
-       "      <td>'I don't offer advice unless it is asked for o...</td>\n",
+       "      <th>7346</th>\n",
+       "      <td>ISFP</td>\n",
+       "      <td>'I'm still here when you mention me by name! L...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>7176</th>\n",
-       "      <td>ISTP</td>\n",
-       "      <td>'Well that was pretty clear. Nice try by mcGre...</td>\n",
+       "      <th>8522</th>\n",
+       "      <td>ISFP</td>\n",
+       "      <td>I've spent years trying to learn how to be pro...</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
        "      type                                              posts\n",
-       "3395  INFJ  'If your boyfriend/girlfriend/spouse gets sick...\n",
-       "6486  INFP  'So I made a blog... After 5 years of wanting ...\n",
-       "4738  INFP  'http://i.imgur.com/gwbS9U2.jpg|||http://image...\n",
-       "123   ISTP  'I don't offer advice unless it is asked for o...\n",
-       "7176  ISTP  'Well that was pretty clear. Nice try by mcGre..."
+       "7057  ISTJ  'At my work, passive-aggressive behavior is wh...\n",
+       "3261  INFJ  '@Macrosapien  I agree with the victim part. W...\n",
+       "760   ENTJ  '6w7  http://youtu.be/jSWIUEV5sPQ|||sx/sp 5w4 ...\n",
+       "7346  ISFP  'I'm still here when you mention me by name! L...\n",
+       "8522  ISFP  I've spent years trying to learn how to be pro..."
       ]
      },
      "execution_count": 4,

diff --git a/notebooks/README.md b/notebooks/README.md
@@ -0,0 +1 @@
+# Jupyter Notebooks
diff --git a/notebooks/__init__.py b/notebooks/__init__.py
diff --git a/trained/model.h5 b/trained/model.h5
diff --git a/trained/type_encoder.pkl b/trained/type_encoder.pkl
diff --git a/trained/vectorizer.pkl b/trained/vectorizer.pkl
diff --git a/trainers/README.md b/trainers/README.md
@@ -1,4 +1,4 @@
-# trainers
+# Trainers
 
 Model trainers.
 
diff --git a/trainers/myers_briggs_type.py b/trainers/myers_briggs_type.py
@@ -0,0 +1,43 @@
+import numpy as np
+import pandas as pd
+from sklearn.utils import shuffle
+from sklearn.preprocessing import OneHotEncoder
+from nltk.stem import SnowballStemmer
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.model_selection import train_test_split
+from keras.models import Sequential
+from keras.layers import Dense, Flatten
+
+
+# loading the dataset
+N = 1500  # number of elements to use from the dataset, because of high ram usage  
+df = shuffle( pd.read_csv('../data/mbti-myers-briggs-personality-types.csv') )[:N]
+
+
+# preprocessing
+type_encoder = OneHotEncoder()
+y = type_encoder.fit_transform( np.array([df['type'].values]).T ).toarray()
+
+vectorizer = StemmedTfidfVectorizer(min_df=1, stop_words='english')
+X = vectorizer.fit_transform(df['posts'].values).toarray()
+
+
+# model selection
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)
+
+
+# building the model
+model = Sequential()
+model.add(Dense(16, activation='relu'))
+model.add(Dense(16, activation='relu'))
+model.add(Dense(16, activation='softmax'))
+model.compile(loss='mean_squared_error', optimizer='adagrad')
+
+history = model.fit(x=X_train, y=y_train, verbose=1, epochs=22, shuffle=True)
+
+train_score = model.evaluate(X_train, y_train, verbose=0)
+print('Train score', train_score)
+test_score = model.evaluate(X_test, y_test, verbose=0)
+print('Test score', test_score)
+
+model.save('../trained/temp.h5')