-
Notifications
You must be signed in to change notification settings - Fork 1
/
SVM_model.py
128 lines (105 loc) · 5.16 KB
/
SVM_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
######################## SVM model ##########################
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_validate
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
# Load the CSV file into a pandas dataframe
df = pd.read_csv("C:\Roy_PhD_Nott_work\PhD work\Research direction\Experiment 1\Submit_V1\Code_Experiment_1\Optimised_image_pre-processing_combined\Results_Batch 3\Combined_microalgae_Best_Optimised_3.csv", header=0)
# Split the dataset into training and testing sets
X = df.drop('Class', axis=1).values
y = df['Class'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# Convert y_train and y_test to Pandas Series
y_train = pd.Series(y_train)
y_test = pd.Series(y_test)
# Print number of training and testing images
print("Number of training images: ", len(X_train))
print("Number of testing images: ", len(X_test))
# Print number of images for each class in training and testing sets
print("Number of images for each class in training set:\n", y_train.value_counts())
print("Number of images for each class in testing set:\n", y_test.value_counts())
# Define the SVM model
svm = SVC()
# Define the hyperparameters to optimize using GridSearchCV
param_grid = {'C': [10**i for i in range(-5,5)],
'kernel': ['linear', 'poly', 'rbf','sigmoid'],
'degree': [2, 3, 4, 5],
'gamma': ['auto', 'scale']}
# Define the GridSearchCV object and fit it to the training data
grid_search = GridSearchCV(svm, param_grid, cv=5, n_jobs=-1, verbose=3, refit=True)
# Fit the GridSearchCV object to the training data and get the results
history = grid_search.fit(X_train, y_train)
# Get the index of the best mean test score
best_idx = np.argmax(history.cv_results_['mean_test_score'])
# Print the best hyperparameters and the corresponding accuracy score
print("Best hyperparameters: ", grid_search.best_params_)
print("Accuracy score: ", grid_search.best_score_)
print("Iteration: ", best_idx + 1)
# Plot the number of iterations, accuracy and losses
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(history.cv_results_['rank_test_score'], history.cv_results_['mean_test_score'], 'o-', label='Test accuracy')
plt.xlabel('Iteration')
plt.ylabel('Accuracy')
plt.legend(loc='best')
plt.subplot(1, 2, 2)
plt.plot(history.cv_results_['rank_test_score'], history.cv_results_['mean_fit_time'], 'o-', label='Fit time')
plt.plot(history.cv_results_['rank_test_score'], history.cv_results_['mean_score_time'], 'o-', label='Score time')
plt.xlabel('Iteration')
plt.ylabel('Time (s)')
plt.legend(loc='best')
plt.tight_layout()
plt.show()
# Predict the labels of the testing set using the optimized model
y_pred = grid_search.predict(X_test)
# Compute the accuracy score and confusion matrix on the testing set
accuracy = accuracy_score(y_test, y_pred)
conf_matrix_test = confusion_matrix(y_test, y_pred)
# Print the accuracy score and confusion matrix on the testing set
print("Accuracy score on the testing set: ", accuracy)
print("Confusion matrix on the testing set: ")
print(conf_matrix_test)
# Compute the accuracy score and confusion matrix on the training set
y_pred_train = grid_search.predict(X_train)
accuracy_train = accuracy_score(y_train, y_pred_train)
conf_matrix_train = confusion_matrix(y_train, y_pred_train)
# Print the accuracy score and confusion matrix on the training set
print("Accuracy score on the training set: ", accuracy_train)
print("Confusion matrix on the training set: ")
print(conf_matrix_train)
# Plot the confusion matrix for the testing set
fig, ax = plt.subplots()
im = ax.imshow(conf_matrix_test, interpolation='nearest', cmap=plt.cm.Blues)
ax.figure.colorbar(im, ax=ax)
# Set tick labels
classes = np.unique(y)
ax.set(xticks=np.arange(conf_matrix_test.shape[1]),
yticks=np.arange(conf_matrix_test.shape[0]),
xticklabels=classes, yticklabels=classes,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
# Loop over data dimensions and create text annotations
thresh = conf_matrix_test.max() / 2.
for i in range(conf_matrix_test.shape[0]):
for j in range(conf_matrix_test.shape[1]):
ax.text(j, i, format(conf_matrix_test[i, j], 'd'),
ha="center", va="center",
color="white" if conf_matrix_test[i, j] > thresh else "black")
fig.tight_layout()
plt.show()
# Advanced evaluation metrics
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
classification = classification_report(y_test, y_pred)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 score: ", f1)
print("Classification report: ")
print(classification)