Skip to content

Commit

Permalink
Changing file places
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinKondor committed Apr 1, 2019
1 parent 932e774 commit f501d08
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 22 deletions.
1 change: 1 addition & 0 deletions models/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Models
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -126,41 +126,41 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3395</th>\n",
" <td>INFJ</td>\n",
" <td>'If your boyfriend/girlfriend/spouse gets sick...</td>\n",
" <th>7057</th>\n",
" <td>ISTJ</td>\n",
" <td>'At my work, passive-aggressive behavior is wh...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6486</th>\n",
" <td>INFP</td>\n",
" <td>'So I made a blog... After 5 years of wanting ...</td>\n",
" <th>3261</th>\n",
" <td>INFJ</td>\n",
" <td>'@Macrosapien I agree with the victim part. W...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4738</th>\n",
" <td>INFP</td>\n",
" <td>'http://i.imgur.com/gwbS9U2.jpg|||http://image...</td>\n",
" <th>760</th>\n",
" <td>ENTJ</td>\n",
" <td>'6w7 http://youtu.be/jSWIUEV5sPQ|||sx/sp 5w4 ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>123</th>\n",
" <td>ISTP</td>\n",
" <td>'I don't offer advice unless it is asked for o...</td>\n",
" <th>7346</th>\n",
" <td>ISFP</td>\n",
" <td>'I'm still here when you mention me by name! L...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7176</th>\n",
" <td>ISTP</td>\n",
" <td>'Well that was pretty clear. Nice try by mcGre...</td>\n",
" <th>8522</th>\n",
" <td>ISFP</td>\n",
" <td>I've spent years trying to learn how to be pro...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" type posts\n",
"3395 INFJ 'If your boyfriend/girlfriend/spouse gets sick...\n",
"6486 INFP 'So I made a blog... After 5 years of wanting ...\n",
"4738 INFP 'http://i.imgur.com/gwbS9U2.jpg|||http://image...\n",
"123 ISTP 'I don't offer advice unless it is asked for o...\n",
"7176 ISTP 'Well that was pretty clear. Nice try by mcGre..."
"7057 ISTJ 'At my work, passive-aggressive behavior is wh...\n",
"3261 INFJ '@Macrosapien I agree with the victim part. W...\n",
"760 ENTJ '6w7 http://youtu.be/jSWIUEV5sPQ|||sx/sp 5w4 ...\n",
"7346 ISFP 'I'm still here when you mention me by name! L...\n",
"8522 ISFP I've spent years trying to learn how to be pro..."
]
},
"execution_count": 4,
Expand Down
1 change: 1 addition & 0 deletions notebooks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Jupyter Notebooks
Empty file added notebooks/__init__.py
Empty file.
Binary file added trained/model.h5
Binary file not shown.
Binary file added trained/type_encoder.pkl
Binary file not shown.
Binary file added trained/vectorizer.pkl
Binary file not shown.
2 changes: 1 addition & 1 deletion trainers/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# trainers
# Trainers

Model trainers.

43 changes: 43 additions & 0 deletions trainers/myers_briggs_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import OneHotEncoder
from nltk.stem import SnowballStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Flatten


# loading the dataset
N = 1500 # number of elements to use from the dataset, because of high ram usage
df = shuffle( pd.read_csv('../data/mbti-myers-briggs-personality-types.csv') )[:N]


# preprocessing
type_encoder = OneHotEncoder()
y = type_encoder.fit_transform( np.array([df['type'].values]).T ).toarray()

vectorizer = StemmedTfidfVectorizer(min_df=1, stop_words='english')
X = vectorizer.fit_transform(df['posts'].values).toarray()


# model selection
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)


# building the model
model = Sequential()
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='adagrad')

history = model.fit(x=X_train, y=y_train, verbose=1, epochs=22, shuffle=True)

train_score = model.evaluate(X_train, y_train, verbose=0)
print('Train score', train_score)
test_score = model.evaluate(X_test, y_test, verbose=0)
print('Test score', test_score)

model.save('../trained/temp.h5')

0 comments on commit f501d08

Please sign in to comment.