forked from ArtificiAI/Sentiment-Analysis-AutoML
-
Notifications
You must be signed in to change notification settings - Fork 2
/
1_optimize.py
39 lines (30 loc) · 1.33 KB
/
1_optimize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
os.environ["JOBLIB_TEMP_FOLDER"] = "/tmp"
from sklearn.model_selection import train_test_split
from sentiment_analysis_auto_ml.data_loader import DataLoader
from sentiment_analysis_auto_ml.evaluation.cross_validation import get_fitted_best_classifier_from_cross_validation
from sentiment_analysis_auto_ml.model_checkpoint import ModelCheckpoint
from sentiment_analysis_auto_ml.pipeline_factory import NewLogisticPipelineFunctor, \
PIPELINE_DEFAULT_NAME, get_generic_hyperparams_grid
if __name__ == "__main__":
mc = ModelCheckpoint(PIPELINE_DEFAULT_NAME)
dl = DataLoader()
X, y = dl.load_data()
# TODO: train-test split to evaluate model.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=True)
best = get_fitted_best_classifier_from_cross_validation(
get_generic_hyperparams_grid(),
NewLogisticPipelineFunctor(),
X_train, y_train,
name=PIPELINE_DEFAULT_NAME,
verbose=True
)
score = best.score(X_train, y_train)
print("Train score:", score)
score = best.score(X_test, y_test)
print("Test score:", score)
# Re-fit on complete data:
best = NewLogisticPipelineFunctor()().set_params(**best.get_params()).fit(X, y)
score = best.score(X, y)
print("Ful-data retrain score:", score)
model = mc.save_model(best)