-
Notifications
You must be signed in to change notification settings - Fork 15
/
solution_02_KNN.py
50 lines (35 loc) · 2.04 KB
/
solution_02_KNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
X_penguin_train, X_penguin_test, y_penguin_train, y_penguin_test = train_test_split(
X_penguin, y_penguin,
random_state=4212280,stratify=y_penguin)
knn_i=KNeighborsClassifier(n_jobs=-1)
pipeline_knn_i=Pipeline([('scalar',StandardScaler()),
('model',knn_i)])
# define the hyperparameters you want to test
#with the range over which you want it to be tested.
grid_values = {'model__n_neighbors': np.arange(5,50,1),
'model__weights':['uniform','distance']}
#Feed it to the GridSearchCV with the right
#score over which the decision should be taken
grid_knn_acc_Penguin = GridSearchCV(pipeline_knn_i,
param_grid = grid_values,
scoring='accuracy',n_jobs=-1)
grid_knn_acc_Penguin.fit(X_penguin_train, y_penguin_train)
testBestScore =grid_knn_acc_Penguin.score(X_penguin_test,y_penguin_test)
print('Grid best parameter (max. accuracy):\n\t', grid_knn_acc_Penguin.best_params_)
print('Grid best score (accuracy): {:.3f}'.format( grid_knn_acc_Penguin.best_score_) )
print('Grid best parameter (max. accuracy) model on test: {:.3f}'.format( testBestScore) )
## predicting the labels on the test set
y_pred_test=grid_knn_acc_Penguin.predict(X_penguin_test)
bestNN = grid_knn_acc_Penguin.best_params_['model__n_neighbors']
bestWeight = grid_knn_acc_Penguin.best_params_['model__weights']
plotTitle = 'KNN n_neighbors: {}, weights: {}\n Accuracy: {:.3f}'.format(bestNN,
bestWeight,
accuracy_score(y_penguin_test,y_pred_test) )
confusion_m_penguin = confusion_matrix(y_penguin_test, y_pred_test)
plt.figure(figsize=(5.5,4))
sns.heatmap(confusion_m_penguin, annot=True)
plt.title(plotTitle)
plt.ylabel('True label')
plt.xlabel('Predicted label')