-
-
Notifications
You must be signed in to change notification settings - Fork 49
/
demo_model_save_load.py
80 lines (60 loc) · 2.41 KB
/
demo_model_save_load.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 14 20:24:30 2021
@author: yuezh
"""
import os
import sys
import scipy as sp
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from joblib import dump, load
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.utils.data import evaluate_print
from combo.models.score_comb import majority_vote, maximization, average
# suppress warnings
import warnings
warnings.filterwarnings("ignore")
# temporary solution for relative imports in case combo is not installed
# if combo is installed, no need to use the following line
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
from suod.models.base import SUOD
from suod.utils.utility import get_estimators_small
if __name__ == "__main__":
# load files
mat_file_list = [
'cardio.mat',
# 'satellite.mat',
# 'satimage-2.mat',
# 'mnist.mat',
]
mat_file = mat_file_list[0]
mat_file_name = mat_file.replace('.mat', '')
print("\n... Processing", mat_file_name, '...')
mat = sp.io.loadmat(os.path.join('', 'datasets', mat_file))
X = mat['X']
y = mat['y']
# standardize data to be digestible for most algorithms
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.4, random_state=42)
contamination = y.sum() / len(y)
base_estimators = get_estimators_small(contamination)
model = SUOD(base_estimators=base_estimators, n_jobs=6, bps_flag=True,
contamination=contamination, approx_flag_global=True)
model.fit(X_train) # fit all models with X
model.approximate(X_train) # conduct model approximation if it is enabled
# save the model
dump(model, 'model.joblib')
# load the model
model = load('model.joblib')
predicted_labels = model.predict(X_test) # predict labels
predicted_scores = model.decision_function(X_test) # predict scores
predicted_probs = model.predict_proba(X_test) # predict scores
###########################################################################
# model evaluation with the loaded model
evaluate_print('majority vote', y_test, majority_vote(predicted_labels))
evaluate_print('average', y_test, average(predicted_scores))
evaluate_print('maximization', y_test, maximization(predicted_scores))