Experimental R package for evaluating classification models built with h2o automl
library(devtools)
install_github("BillPetti/h2oclasseval')
We will set the outcome to classify as Class
from Survived
:
library(h2o)
library(h2oclasseval)
library(tidyverse)
library(titanic)
titanic <- titanic::titanic_train
titanic <- titanic %>%
select(-c(PassengerId, Name, Ticket, Cabin)) %>%
mutate(Survived = factor(Survived, levels = c(0,1)),
Pclass = factor(Pclass, levels = c(1,2,3)),
Sex = as.factor(Sex),
Embarked = as.factor(Embarked)) %>%
rename(Class = Survived) %>%
mutate_if(is.numeric, scale) %>%
mutate_if(is.matrix, as.numeric)
train <- titanic %>%
sample_frac(.75)
test <- dplyr::setdiff(titanic, train)
For speed, we'll exclude any Deep Learning algorithms:
if(tryCatch({
h2o.clusterIsUp()
}, error=function(e) "No h2o Instance Running") != "No h2o Instance Running") {
h2o.shutdown(prompt = F)
}
h2o.init(max_mem_size = "8g")
y <- "Class"
x <- setdiff(names(train), y)
h2o_train <- as.h2o(train)
h2o_test <- as.h2o(test)
# fit models -----
aml <- h2o.automl(x = x,
y = y,
training_frame = h2o_train,
leaderboard_frame = h2o_train,
max_models = 60,
include_algos = c("DRF", "GLM", "GBM"),
#exclude_algos = c("DeepLearning"),
max_runtime_secs = 3600)
lb <- aml@leaderboard
lb_dataframe <- aml@leaderboard %>%
as.data.frame()
Set a path_slug
to an existing director and a time_stamp
if different from function's default:
path_slug <- '/Users/williampetti/Desktop/h2o_titanic/'
time_stamp <- gsub('-| |:', '_', Sys.time())
h2o_payload <- save_h2o_models_varimport(leaderboard = aml@leaderboard,
path_slug = path_slug,
time_stamp = time_stamp)
You should now see individual h2o models and a object that collects each object and their variable importance (if applicable):
> list.files(path_slug)
[1] "2020_08_25_11_29_11_h2o_train_model_metrics.rds"
[2] "DRF_1_AutoML_20200825_112853"
[3] "GBM_1_AutoML_20200825_112853"
[4] "GBM_2_AutoML_20200825_112853"
[5] "GBM_3_AutoML_20200825_112853"
[6] "GBM_4_AutoML_20200825_112853"
[7] "GBM_5_AutoML_20200825_112853"
[8] "GBM_grid__1_AutoML_20200825_112853_model_1"
[9] "GBM_grid__1_AutoML_20200825_112853_model_2"
[10] "GBM_grid__1_AutoML_20200825_112853_model_3"
[11] "GBM_grid__1_AutoML_20200825_112853_model_4"
[12] "GBM_grid__1_AutoML_20200825_112853_model_5"
[13] "GBM_grid__1_AutoML_20200825_112853_model_6"
[14] "GBM_grid__1_AutoML_20200825_112853_model_7"
[15] "GBM_grid__1_AutoML_20200825_112853_model_8"
[16] "GLM_1_AutoML_20200825_112853"
[17] "XRT_1_AutoML_20200825_112853"
Variable importance for each model can be accessed like so:
> h2o_payload$DRF_1_AutoML_20200825_112853$variable_importance
model_id variable relative_importance scaled_importance percentage
1 DRF_1_AutoML_20200825_112853 Sex 1109.5988 1.0000000 0.32190429
2 DRF_1_AutoML_20200825_112853 Age 732.1392 0.6598234 0.21239998
3 DRF_1_AutoML_20200825_112853 Fare 728.6841 0.6567095 0.21139762
4 DRF_1_AutoML_20200825_112853 Pclass 426.1224 0.3840329 0.12362184
5 DRF_1_AutoML_20200825_112853 SibSp 167.3491 0.1508195 0.04854945
6 DRF_1_AutoML_20200825_112853 Embarked 157.0859 0.1415700 0.04557200
7 DRF_1_AutoML_20200825_112853 Parch 126.0039 0.1135581 0.03655483
You can also generate a custom grid with a number of evaluation metrics for classification models. The grid will calculate overall logloss
as well as custom metrics for individual thresholds:
# pull model list
model_list <- map(h2o_payload, ~.$model_object)
# generate custom metrics with different thresholds
h2o_test_metrics <- map_df(.x = model_list,
~custom_h2o_confusion_metrics(model = .x,
newdata = h2o_test,
threshold = c(0,1)))
# generate table for models with the lowest logloss and precisin and recall at or above 75%
h2o_test_metrics %>%
filter(precision >= .75,
recall >= .75) %>%
arrange(logloss, desc(precision), desc(recall), desc(f1_score)) %>%
slice(1:10) %>%
bpettir::format_tables_md()
model | threshold | logloss | precision | recall | neg_precision | specificity | fall_out | f1_score | true_positives | false_negatives | false_positives | true_negatives |
---|---|---|---|---|---|---|---|---|---|---|---|---|
GBM_grid__1_AutoML_20200825_112853_model_3 | 0.42 | 0.4529051 | 0.7500000 | 0.7702703 | 0.8380952 | 0.8224299 | 0.1775701 | 0.7600000 | 57 | 17 | 19 | 88 |
GBM_grid__1_AutoML_20200825_112853_model_1 | 0.51 | 0.4539179 | 0.7777778 | 0.7567568 | 0.8348624 | 0.8504673 | 0.1495327 | 0.7671233 | 56 | 18 | 16 | 91 |
GBM_grid__1_AutoML_20200825_112853_model_1 | 0.52 | 0.4539179 | 0.7777778 | 0.7567568 | 0.8348624 | 0.8504673 | 0.1495327 | 0.7671233 | 56 | 18 | 16 | 91 |
GBM_grid__1_AutoML_20200825_112853_model_1 | 0.50 | 0.4539179 | 0.7671233 | 0.7567568 | 0.8333333 | 0.8411215 | 0.1588785 | 0.7619048 | 56 | 18 | 17 | 90 |
GBM_grid__1_AutoML_20200825_112853_model_1 | 0.49 | 0.4539179 | 0.7567568 | 0.7567568 | 0.8317757 | 0.8317757 | 0.1682243 | 0.7567568 | 56 | 18 | 18 | 89 |
GBM_grid__1_AutoML_20200825_112853_model_6 | 0.46 | 0.4540665 | 0.7532468 | 0.7837838 | 0.8461538 | 0.8224299 | 0.1775701 | 0.7682119 | 58 | 16 | 19 | 88 |
GBM_grid__1_AutoML_20200825_112853_model_6 | 0.47 | 0.4540665 | 0.7500000 | 0.7702703 | 0.8380952 | 0.8224299 | 0.1775701 | 0.7600000 | 57 | 17 | 19 | 88 |
GBM_2_AutoML_20200825_112853 | 0.55 | 0.4644109 | 0.8000000 | 0.7567568 | 0.8378378 | 0.8691589 | 0.1308411 | 0.7777778 | 56 | 18 | 14 | 93 |
GBM_2_AutoML_20200825_112853 | 0.56 | 0.4644109 | 0.8000000 | 0.7567568 | 0.8378378 | 0.8691589 | 0.1308411 | 0.7777778 | 56 | 18 | 14 | 93 |
GBM_2_AutoML_20200825_112853 | 0.57 | 0.4644109 | 0.8000000 | 0.7567568 | 0.8378378 | 0.8691589 | 0.1308411 | 0.7777778 | 56 | 18 | 14 | 93 |