diff --git a/DESCRIPTION b/DESCRIPTION index f66547be..c31f451e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: stacks Title: Tidy Model Stacking -Version: 0.2.0.9000 +Version: 0.2.1 Authors@R: c( person(given = "Simon", family = "Couch", diff --git a/NEWS.md b/NEWS.md index 1d739fd7..20837ead 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,29 +1,20 @@ -# stacks - -## v0.2.0.9000 - -To be released as 0.2.1 - -* Various bug fixes and improvements to documentation. - -### Bug fixes +# v0.2.1 * Updates for importing workflow sets that use the `add_variables()` preprocessor. - * Plot fixes for cases where coefficients are negative. - * Performance and member plots now show the effect of multiple mixture values. +* Package diagrams now have alt text. -## v0.2.0 +# v0.2.0 -### Breaking changes +## Breaking changes This release of the package changes some elements of the internal structure of model stacks. As such, model stacks stored as saved objects will need to be regenerated before predicting, plotting, printing, etc. -### New features +## New features * The package now supports elastic net models as a meta-learner via the `mixture` argument to `blend_predictions`. @@ -34,7 +25,7 @@ be regenerated before predicting, plotting, printing, etc. * Objects tuned with racing methods from the {finetune} package can now be added as candidate members. -### Bug fixes +## Bug fixes * Fixed bug in determining member hyperparameters during member fitting when using non-RMSE/ROC AUC metrics. @@ -43,7 +34,7 @@ be regenerated before predicting, plotting, printing, etc. are not valid column names and use `make.names` for associated candidate members. -### Miscellaneous improvements +## Miscellaneous improvements * Drop {digest} dependency in favor of {tune}/{rsample} "fingerprinting" to check consistency of resamples. @@ -55,6 +46,6 @@ be regenerated before predicting, plotting, printing, etc. more informative. * Various improvements to documentation. -### v0.1.0 +# v0.1.0 Initial release! diff --git a/R/add_candidates.R b/R/add_candidates.R index df63c9f6..e7958987 100644 --- a/R/add_candidates.R +++ b/R/add_candidates.R @@ -184,7 +184,7 @@ add_candidates.default <- function(data_stack, candidates, name, ...) { .set_mode_ <- function(stack, candidates, name) { wf_spec <- attr(candidates, "workflow") %>% - workflows::pull_workflow_spec() + workflows::extract_spec_parsnip() new_mode <- wf_spec$mode old_mode <- attr(stack, "mode") @@ -363,7 +363,7 @@ update_stack_data <- function(stack, new_data) { stack_workflow <- function(x) { res <- workflows::workflow() %>% - workflows::add_model(workflows::pull_workflow_spec(x)) + workflows::add_model(workflows::extract_spec_parsnip(x)) pre <- workflows::pull_workflow_preprocessor(x) diff --git a/R/print.R b/R/print.R index 93082162..aad3d913 100644 --- a/R/print.R +++ b/R/print.R @@ -84,7 +84,7 @@ top_coefs <- function(x, penalty = x$penalty$penalty, n = 10) { sub_models <- purrr::map_dfr(x$cols_map, ~ tibble::tibble(terms = .x), .id = "model_name") model_types <- - purrr::map(x$model_defs, workflows::pull_workflow_spec) %>% + purrr::map(x$model_defs, workflows::extract_spec_parsnip) %>% purrr::map_dfr(~ tibble::tibble(model_type = class(.x)[1]), .id = "model_name") res <- dplyr::left_join(betas, sub_models, by = "terms") %>% diff --git a/README.Rmd b/README.Rmd index fefbe08f..0b7108c6 100644 --- a/README.Rmd +++ b/README.Rmd @@ -43,7 +43,7 @@ stacks is generalized with respect to: * Cross-validation scheme: Any resampling algorithm implemented in [rsample](https://rsample.tidymodels.org/) or adjacent packages is fair game for resampling data for use in training a model stack. * Error metric: Any metric function implemented in [yardstick](https://yardstick.tidymodels.org/) or adjacent packages is fair game for evaluating model stacks and their members. That package provides some infrastructure for creating your own metric functions as well! -stacks uses a regularized linear model to combine predictions from ensemble members, though this model type is only one of many possible learning algorithms that could be used to fit a stacked ensemble model. For implementations of additional ensemble learning algorithms, check out [h2o](http://docs.h2o.ai/h2o/latest-stable/h2o-r/docs/reference/h2o.stackedEnsemble.html) and [SuperLearner](https://CRAN.R-project.org/package=SuperLearner). +stacks uses a regularized linear model to combine predictions from ensemble members, though this model type is only one of many possible learning algorithms that could be used to fit a stacked ensemble model. For implementations of additional ensemble learning algorithms, check out [h2o](https://docs.h2o.ai/h2o/latest-stable/h2o-r/docs/reference/h2o.stackedEnsemble.html) and [SuperLearner](https://CRAN.R-project.org/package=SuperLearner). Rather than diving right into the implementation, we'll focus here on how the pieces fit together, conceptually, in building an ensemble with `stacks`. See the `basics` vignette for an example of the API in action! diff --git a/README.md b/README.md index a0e467c3..ee41d509 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ ensemble members, though this model type is only one of many possible learning algorithms that could be used to fit a stacked ensemble model. For implementations of additional ensemble learning algorithms, check out -[h2o](http://docs.h2o.ai/h2o/latest-stable/h2o-r/docs/reference/h2o.stackedEnsemble.html) +[h2o](https://docs.h2o.ai/h2o/latest-stable/h2o-r/docs/reference/h2o.stackedEnsemble.html) and [SuperLearner](https://CRAN.R-project.org/package=SuperLearner). Rather than diving right into the implementation, we’ll focus here on diff --git a/cran-comments.md b/cran-comments.md index a8e1b223..1ad9e821 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,4 +1,4 @@ -# stacks 0.2.0 +# stacks 0.2.1 ## Test environments diff --git a/docs/404.html b/docs/404.html index 703aa1fc..8629860e 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,86 +1,43 @@ - - - - + + + + - Page not found (404) • stacks - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + - - - - + + + - - -
-
- +
+ + + + + - - -
+
+
- -
+ + + - - diff --git a/docs/CODE_OF_CONDUCT.html b/docs/CODE_OF_CONDUCT.html index c7fe2bb4..79b4f92f 100644 --- a/docs/CODE_OF_CONDUCT.html +++ b/docs/CODE_OF_CONDUCT.html @@ -75,9 +75,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +
@@ -262,6 +262,8 @@

Contents

+ + diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index a620fa3d..ef512005 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -75,9 +75,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +

Taking a quick look at the data, it seems like the hatch time is pretty closely related to some of our predictors!

-library(ggplot2)
+library(ggplot2)
 
-ggplot(tree_frogs) +
-  aes(x = age, y = latency, color = treatment) +
-  geom_point() +
-  labs(x = "Embryo Age (s)", y = "Time to Hatch (s)", col = "Treatment")
+ggplot(tree_frogs) + + aes(x = age, y = latency, color = treatment) + + geom_point() + + labs(x = "Embryo Age (s)", y = "Time to Hatch (s)", col = "Treatment")

Let’s give this a go!

-Define candidate ensemble members

-

At the highest level, ensembles are formed from model definitions. In this package, model definitions are an instance of a minimal workflow, containing a model specification (as defined in the parsnip package) and, optionally, a preprocessor (as defined in the recipes package). Model definitions specify the form of candidate ensemble members.

-

+Define candidate ensemble members +

At the highest level, ensembles are formed from model definitions. In this package, model definitions are an instance of a minimal workflow, containing a model specification (as defined in the parsnip package) and, optionally, a preprocessor (as defined in the recipes package). Model definitions specify the form of candidate ensemble members.

+

A diagram representing 'model definitions,' which specify the form of candidate ensemble members. Three colored boxes represent three different model types; a K-nearest neighbors model (in salmon), a linear regression model (in yellow), and a support vector machine model (in green).

Defining the constituent model definitions is undoubtedly the longest part of building an ensemble with stacks. If you’re familiar with tidymodels “proper,” you’re probably fine to skip this section, keeping a few things in mind:

We’ll first start out with splitting up the training data, generating resamples, and setting some options that will be used by each model definition.

 # some setup: resampling and a basic recipe
-set.seed(1)
-tree_frogs_split <- initial_split(tree_frogs)
-tree_frogs_train <- training(tree_frogs_split)
-tree_frogs_test  <- testing(tree_frogs_split)
+set.seed(1)
+tree_frogs_split <- initial_split(tree_frogs)
+tree_frogs_train <- training(tree_frogs_split)
+tree_frogs_test  <- testing(tree_frogs_split)
 
-set.seed(1)
-folds <- rsample::vfold_cv(tree_frogs_train, v = 5)
+set.seed(1)
+folds <- rsample::vfold_cv(tree_frogs_train, v = 5)
 
 tree_frogs_rec <- 
-  recipe(latency ~ ., data = tree_frogs_train)
+  recipe(latency ~ ., data = tree_frogs_train)
 
-metric <- metric_set(rmse)
+metric <- metric_set(rmse)

Tuning and fitting results for use in ensembles need to be fitted with the control arguments save_pred = TRUE and save_workflow = TRUE—these settings ensure that the assessment set predictions, as well as the workflow used to fit the resamples, are stored in the resulting object. For convenience, stacks supplies some control_stack_*() functions to generate the appropriate objects for you.

-

In this example, we’ll be working with tune_grid() and fit_resamples() from the tune package, so we will use the following control settings:

+

In this example, we’ll be working with tune_grid() and fit_resamples() from the tune package, so we will use the following control settings:

-ctrl_grid <- control_stack_grid()
-ctrl_res <- control_stack_resamples()
+ctrl_grid <- control_stack_grid() +ctrl_res <- control_stack_resamples()

We’ll define three different model definitions to try to predict time to hatch—a K-nearest neighbors model (with hyperparameters to tune), a linear model, and a support vector machine model (again, with hyperparameters to tune).

Starting out with K-nearest neighbors, we begin by creating a parsnip model specification:

 # create a model definition
 knn_spec <-
-  nearest_neighbor(
+  nearest_neighbor(
     mode = "regression", 
-    neighbors = tune("k")
-  ) %>%
-  set_engine("kknn")
+    neighbors = tune("k")
+  ) %>%
+  set_engine("kknn")
 
 knn_spec
 #> K-Nearest Neighbor Model Specification (regression)
@@ -198,16 +194,18 @@ 

#> neighbors = tune("k") #> #> Computational engine: kknn

-

Note that, since we are tuning over several possible numbers of neighbors, this model specification defines multiple model configurations. The specific form of those configurations will be determined when specifying the grid search in tune_grid().

+

Note that, since we are tuning over several possible numbers of neighbors, this model specification defines multiple model configurations. The specific form of those configurations will be determined when specifying the grid search in tune_grid().

From here, we extend the basic recipe defined earlier to fully specify the form of the design matrix for use in a K-nearest neighbors model:

 # extend the recipe
 knn_rec <-
-  tree_frogs_rec %>%
-  step_dummy(all_nominal()) %>%
-  step_zv(all_predictors(), skip = TRUE) %>%
-  step_meanimpute(all_numeric(), skip = TRUE) %>%
-  step_normalize(all_numeric(), skip = TRUE)
+  tree_frogs_rec %>%
+  step_dummy(all_nominal()) %>%
+  step_zv(all_predictors(), skip = TRUE) %>%
+  step_meanimpute(all_numeric(), skip = TRUE) %>%
+  step_normalize(all_numeric(), skip = TRUE)
+#> Warning: `step_meanimpute()` was deprecated in recipes 0.1.16.
+#> Please use `step_impute_mean()` instead.
 
 knn_rec
 #> Data Recipe
@@ -229,9 +227,9 @@ 

 # add both to a workflow
 knn_wflow <- 
-  workflow() %>% 
-  add_model(knn_spec) %>%
-  add_recipe(knn_rec)
+  workflow() %>% 
+  add_model(knn_spec) %>%
+  add_recipe(knn_rec)
 
 knn_wflow
 #> ══ Workflow ════════════════════════════════════════════════════════════════════
@@ -241,10 +239,10 @@ 

#> ── Preprocessor ──────────────────────────────────────────────────────────────── #> 4 Recipe Steps #> -#> ● step_dummy() -#> ● step_zv() -#> ● step_meanimpute() -#> ● step_normalize() +#> • step_dummy() +#> • step_zv() +#> • step_impute_mean() +#> • step_normalize() #> #> ── Model ─────────────────────────────────────────────────────────────────────── #> K-Nearest Neighbor Model Specification (regression) @@ -256,9 +254,9 @@

Finally, we can make use of the workflow, training set resamples, metric set, and control object to tune our hyperparameters. Using the grid argument, we specify that we would like to optimize over four possible values of k using a grid search.

 # tune k and fit to the 5-fold cv
-set.seed(2020)
+set.seed(2020)
 knn_res <- 
-  tune_grid(
+  tune_grid(
     knn_wflow,
     resamples = folds,
     metrics = metric,
@@ -270,37 +268,37 @@ 

#> # Tuning results #> # 5-fold cross-validation #> # A tibble: 5 x 5 -#> splits id .metrics .notes .predictions -#> <list> <chr> <list> <list> <list> -#> 1 <split [343/86… Fold1 <tibble[,5] [4 ×… <tibble[,1] [0 ×… <tibble[,5] [344 × … -#> 2 <split [343/86… Fold2 <tibble[,5] [4 ×… <tibble[,1] [0 ×… <tibble[,5] [344 × … -#> 3 <split [343/86… Fold3 <tibble[,5] [4 ×… <tibble[,1] [0 ×… <tibble[,5] [344 × … -#> 4 <split [343/86… Fold4 <tibble[,5] [4 ×… <tibble[,1] [0 ×… <tibble[,5] [344 × … -#> 5 <split [344/85… Fold5 <tibble[,5] [4 ×… <tibble[,1] [0 ×… <tibble[,5] [340 × …

+#> splits id .metrics .notes .predictions +#> <list> <chr> <list> <list> <list> +#> 1 <split [343/86]> Fold1 <tibble [4 × 5]> <tibble [0 × 1]> <tibble [344 × 5]> +#> 2 <split [343/86]> Fold2 <tibble [4 × 5]> <tibble [0 × 1]> <tibble [344 × 5]> +#> 3 <split [343/86]> Fold3 <tibble [4 × 5]> <tibble [0 × 1]> <tibble [344 × 5]> +#> 4 <split [343/86]> Fold4 <tibble [4 × 5]> <tibble [0 × 1]> <tibble [344 × 5]> +#> 5 <split [344/85]> Fold5 <tibble [4 × 5]> <tibble [0 × 1]> <tibble [340 × 5]>

This knn_res object fully specifies the candidate members, and is ready to be included in a stacks workflow.

-

Now, specifying the linear model, note that we are not optimizing over any hyperparameters. Thus, we use the fit_resamples() function rather than tune_grid() or tune_bayes() when fitting to our resamples.

+

Now, specifying the linear model, note that we are not optimizing over any hyperparameters. Thus, we use the fit_resamples() function rather than tune_grid() or tune_bayes() when fitting to our resamples.

 # create a model definition
 lin_reg_spec <-
-  linear_reg() %>%
-  set_engine("lm")
+  linear_reg() %>%
+  set_engine("lm")
 
 # extend the recipe
 lin_reg_rec <-
-  tree_frogs_rec %>%
-  step_dummy(all_nominal()) %>%
-  step_zv(all_predictors(), skip = TRUE)
+  tree_frogs_rec %>%
+  step_dummy(all_nominal()) %>%
+  step_zv(all_predictors(), skip = TRUE)
 
 # add both to a workflow
 lin_reg_wflow <- 
-  workflow() %>%
-  add_model(lin_reg_spec) %>%
-  add_recipe(lin_reg_rec)
+  workflow() %>%
+  add_model(lin_reg_spec) %>%
+  add_recipe(lin_reg_rec)
 
 # fit to the 5-fold cv
-set.seed(2020)
+set.seed(2020)
 lin_reg_res <- 
-  fit_resamples(
+  fit_resamples(
     lin_reg_wflow,
     resamples = folds,
     metrics = metric,
@@ -311,43 +309,45 @@ 

#> # Resampling results #> # 5-fold cross-validation #> # A tibble: 5 x 5 -#> splits id .metrics .notes .predictions -#> <list> <chr> <list> <list> <list> -#> 1 <split [343/86… Fold1 <tibble[,4] [1 × … <tibble[,1] [0 ×… <tibble[,4] [86 × … -#> 2 <split [343/86… Fold2 <tibble[,4] [1 × … <tibble[,1] [0 ×… <tibble[,4] [86 × … -#> 3 <split [343/86… Fold3 <tibble[,4] [1 × … <tibble[,1] [0 ×… <tibble[,4] [86 × … -#> 4 <split [343/86… Fold4 <tibble[,4] [1 × … <tibble[,1] [0 ×… <tibble[,4] [86 × … -#> 5 <split [344/85… Fold5 <tibble[,4] [1 × … <tibble[,1] [0 ×… <tibble[,4] [85 × …

+#> splits id .metrics .notes .predictions +#> <list> <chr> <list> <list> <list> +#> 1 <split [343/86]> Fold1 <tibble [1 × 4]> <tibble [0 × 1]> <tibble [86 × 4]> +#> 2 <split [343/86]> Fold2 <tibble [1 × 4]> <tibble [0 × 1]> <tibble [86 × 4]> +#> 3 <split [343/86]> Fold3 <tibble [1 × 4]> <tibble [0 × 1]> <tibble [86 × 4]> +#> 4 <split [343/86]> Fold4 <tibble [1 × 4]> <tibble [0 × 1]> <tibble [86 × 4]> +#> 5 <split [344/85]> Fold5 <tibble [1 × 4]> <tibble [0 × 1]> <tibble [85 × 4]>

Finally, putting together the model definition for the support vector machine:

 # create a model definition
 svm_spec <- 
-  svm_rbf(
-    cost = tune("cost"), 
-    rbf_sigma = tune("sigma")
-  ) %>%
-  set_engine("kernlab") %>%
-  set_mode("regression")
+  svm_rbf(
+    cost = tune("cost"), 
+    rbf_sigma = tune("sigma")
+  ) %>%
+  set_engine("kernlab") %>%
+  set_mode("regression")
 
 # extend the recipe
 svm_rec <-
-  tree_frogs_rec %>%
-  step_dummy(all_nominal()) %>%
-  step_zv(all_predictors(), skip = TRUE) %>%
-  step_meanimpute(all_numeric(), skip = TRUE) %>%
-  step_corr(all_predictors(), skip = TRUE) %>%
-  step_normalize(all_numeric(), skip = TRUE)
+  tree_frogs_rec %>%
+  step_dummy(all_nominal()) %>%
+  step_zv(all_predictors(), skip = TRUE) %>%
+  step_meanimpute(all_numeric(), skip = TRUE) %>%
+  step_corr(all_predictors(), skip = TRUE) %>%
+  step_normalize(all_numeric(), skip = TRUE)
+#> Warning: `step_meanimpute()` was deprecated in recipes 0.1.16.
+#> Please use `step_impute_mean()` instead.
 
 # add both to a workflow
 svm_wflow <- 
-  workflow() %>% 
-  add_model(svm_spec) %>%
-  add_recipe(svm_rec)
+  workflow() %>% 
+  add_model(svm_spec) %>%
+  add_recipe(svm_rec)
 
 # tune cost and sigma and fit to the 5-fold cv
-set.seed(2020)
+set.seed(2020)
 svm_res <- 
-  tune_grid(
+  tune_grid(
     svm_wflow, 
     resamples = folds, 
     grid = 6,
@@ -359,33 +359,33 @@ 

#> # Tuning results #> # 5-fold cross-validation #> # A tibble: 5 x 5 -#> splits id .metrics .notes .predictions -#> <list> <chr> <list> <list> <list> -#> 1 <split [343/86… Fold1 <tibble[,6] [6 ×… <tibble[,1] [0 ×… <tibble[,6] [516 × … -#> 2 <split [343/86… Fold2 <tibble[,6] [6 ×… <tibble[,1] [0 ×… <tibble[,6] [516 × … -#> 3 <split [343/86… Fold3 <tibble[,6] [6 ×… <tibble[,1] [0 ×… <tibble[,6] [516 × … -#> 4 <split [343/86… Fold4 <tibble[,6] [6 ×… <tibble[,1] [0 ×… <tibble[,6] [516 × … -#> 5 <split [344/85… Fold5 <tibble[,6] [6 ×… <tibble[,1] [0 ×… <tibble[,6] [510 × …

+#> splits id .metrics .notes .predictions +#> <list> <chr> <list> <list> <list> +#> 1 <split [343/86]> Fold1 <tibble [6 × 6]> <tibble [0 × 1]> <tibble [516 × 6]> +#> 2 <split [343/86]> Fold2 <tibble [6 × 6]> <tibble [0 × 1]> <tibble [516 × 6]> +#> 3 <split [343/86]> Fold3 <tibble [6 × 6]> <tibble [0 × 1]> <tibble [516 × 6]> +#> 4 <split [343/86]> Fold4 <tibble [6 × 6]> <tibble [0 × 1]> <tibble [516 × 6]> +#> 5 <split [344/85]> Fold5 <tibble [6 × 6]> <tibble [0 × 1]> <tibble [510 × 6]>

Altogether, we’ve created three model definitions, where the K-nearest neighbors model definition specifies 4 model configurations, the linear regression specifies 1, and the support vector machine specifies 6.

-

+

A diagram representing 'candidate members' generated from each model definition. Four salmon-colored boxes labeled 'KNN' represent K-nearest neighbors models trained on the resamples with differing hyperparameters. Similarly, the linear regression (LM) model generates one candidate member, and the support vector machine (SVM) model generates six.

With these three model definitions fully specified, we are ready to begin stacking these model configurations. (Note that, in most applied settings, one would likely specify many more than 11 candidate members.)

-Putting together a stack

+Putting together a stack

The first step to building an ensemble with stacks is to create a data_stack object—in this package, data stacks are tibbles (with some extra attributes) that contain the assessment set predictions for each candidate ensemble member.

-

+

A diagram representing a 'data stack,' a specific kind of data frame. Colored 'columns' depict, in white, the true value of the outcome variable in the validation set, followed by four columns (in salmon) representing the predictions from the K-nearest neighbors model, one column (in tan) representing the linear regression model, and six (in green) representing the support vector machine model.

We can initialize a data stack using the stacks() function.

 stacks()
 #> # A data stack with 0 model definitions and 0 candidate members.
-

The stacks() function works sort of like the ggplot() constructor from ggplot2—the function creates a basic structure that the object will be built on top of—except you’ll pipe the outputs rather than adding them with +.

+

The stacks() function works sort of like the ggplot() constructor from ggplot2—the function creates a basic structure that the object will be built on top of—except you’ll pipe the outputs rather than adding them with +.

The add_candidates() function adds ensemble members to the stack.

 tree_frogs_data_st <- 
-  stacks() %>%
-  add_candidates(knn_res) %>%
-  add_candidates(lin_reg_res) %>%
+  stacks() %>%
+  add_candidates(knn_res) %>%
+  add_candidates(lin_reg_res) %>%
   add_candidates(svm_res)
 
 tree_frogs_data_st
@@ -396,7 +396,7 @@ 

#> # Outcome: latency (numeric)

As mentioned before, under the hood, a data_stack object is really just a tibble with some extra attributes. Checking out the actual data:

-as_tibble(tree_frogs_data_st)
+as_tibble(tree_frogs_data_st)
 #> # A tibble: 429 x 12
 #>    latency knn_res_1_1 knn_res_1_2 knn_res_1_3 knn_res_1_4 lin_reg_res_1_1
 #>      <dbl>       <dbl>       <dbl>       <dbl>       <dbl>           <dbl>
@@ -410,90 +410,89 @@ 

#> 8 46 -0.523 -0.549 -0.581 -0.587 37.1 #> 9 137 -0.287 -0.352 -0.447 -0.456 78.8 #> 10 73 -0.523 -0.549 -0.581 -0.587 38.8 -#> # … with 419 more rows, and 6 more variables: svm_res_1_5 <dbl>, -#> # svm_res_1_6 <dbl>, svm_res_1_1 <dbl>, svm_res_1_4 <dbl>, svm_res_1_3 <dbl>, -#> # svm_res_1_2 <dbl>

+#> # … with 419 more rows, and 6 more variables: svm_res_1_1 <dbl>, +#> # svm_res_1_4 <dbl>, svm_res_1_3 <dbl>, svm_res_1_5 <dbl>, svm_res_1_2 <dbl>, +#> # svm_res_1_6 <dbl>

The first column gives the first response value, and the remaining columns give the assessment set predictions for each ensemble member. Since we’re in the regression case, there’s only one column per ensemble member. In classification settings, there are as many columns as there are levels of the outcome variable per candidate ensemble member.

That’s it! We’re now ready to evaluate how it is that we need to combine predictions from each candidate ensemble member.

-Fit the stack

+Fit the stack

The outputs from each of these candidate ensemble members are highly correlated, so the blend_predictions() function performs regularization to figure out how we can combine the outputs from the stack members to come up with a final prediction.

 tree_frogs_model_st <-
-  tree_frogs_data_st %>%
+  tree_frogs_data_st %>%
   blend_predictions()

The blend_predictions function determines how member model output will ultimately be combined in the final prediction by fitting a LASSO model on the data stack, predicting the true assessment set outcome using the predictions from each of the candidate members. Candidates with nonzero stacking coefficients become members.

-

-

To make sure that we have the right trade-off between minimizing the number of members and optimizing performance, we can use the autoplot() method:

+

A diagram representing 'stacking coefficients,' the coefficients of the linear model combining each of the candidate member predictions to generate the ensemble's ultimate prediction. Boxes for each of the candidate members are placed besides each other, filled in with color if the coefficient for the associated candidate member is nonzero.

+

To make sure that we have the right trade-off between minimizing the number of members and optimizing performance, we can use the autoplot() method:

-theme_set(theme_bw())
-autoplot(tree_frogs_model_st)
+theme_set(theme_bw()) +autoplot(tree_frogs_model_st)

To show the relationship more directly:

-autoplot(tree_frogs_model_st, type = "members")
+autoplot(tree_frogs_model_st, type = "members")

If these results were not good enough, blend_predictions() could be called again with different values of penalty. As it is, blend_predictions() picks the penalty parameter with the numerically optimal results. To see the top results:

-autoplot(tree_frogs_model_st, type = "weights")
+autoplot(tree_frogs_model_st, type = "weights")

Now that we know how to combine our model output, we can fit the candidates with non-zero stacking coefficients on the full training set.

 tree_frogs_model_st <-
-  tree_frogs_model_st %>%
+  tree_frogs_model_st %>%
   fit_members()
-

+

A diagram representing the ensemble members, where each are pentagons labeled and colored-in according to the candidate members they arose from.

Model stacks can be thought of as a group of fitted member models and a set of instructions on how to combine their predictions.

-

+

A diagram representing the 'model stack' class, which collates the stacking coefficients and members (candidate members with nonzero stacking coefficients that are trained on the full training set). The representation of the stacking coefficients and members is as before. Model stacks are a list subclass.

To identify which model configurations were assigned what stacking coefficients, we can make use of the collect_parameters() function:

 collect_parameters(tree_frogs_model_st, "svm_res")
 #> # A tibble: 6 x 4
-#>   member         cost         sigma  coef
-#>   <chr>         <dbl>         <dbl> <dbl>
-#> 1 svm_res_1_1 0.153   0.0196         13.9
-#> 2 svm_res_1_2 5.76    0.00000856    516. 
-#> 3 svm_res_1_3 1.72    0.0000239       0  
-#> 4 svm_res_1_4 0.192   0.0000000552    0  
-#> 5 svm_res_1_5 0.00315 0.00000000359   0  
-#> 6 svm_res_1_6 0.00733 0.0326         12.5
+#> member cost sigma coef +#> <chr> <dbl> <dbl> <dbl> +#> 1 svm_res_1_1 0.00143 6.64e- 9 0 +#> 2 svm_res_1_2 3.59 3.95e- 4 27.1 +#> 3 svm_res_1_3 0.0978 1.81e- 2 0 +#> 4 svm_res_1_4 0.00849 2.16e-10 0 +#> 5 svm_res_1_5 0.256 4.54e- 1 0 +#> 6 svm_res_1_6 7.64 3.16e- 7 352.

This object is now ready to predict with new data!

 tree_frogs_test <- 
-  tree_frogs_test %>%
-  bind_cols(predict(tree_frogs_model_st, .))
+ tree_frogs_test %>% + bind_cols(predict(tree_frogs_model_st, .))

Juxtaposing the predictions with the true data:

-ggplot(tree_frogs_test) +
-  aes(x = latency, 
+ggplot(tree_frogs_test) +
+  aes(x = latency, 
       y = .pred) +
-  geom_point() + 
-  coord_obs_pred()
+ geom_point() + + coord_obs_pred()

Looks like our predictions were pretty strong! How do the stacks predictions perform, though, as compared to the members’ predictions? We can use the type = "members" argument to generate predictions from each of the ensemble members.

 member_preds <- 
-  tree_frogs_test %>%
-  select(latency) %>%
-  bind_cols(predict(tree_frogs_model_st, tree_frogs_test, members = TRUE))
+ tree_frogs_test %>% + select(latency) %>% + bind_cols(predict(tree_frogs_model_st, tree_frogs_test, members = TRUE))

Now, evaluating the root mean squared error from each model:

-map_dfr(member_preds, rmse, truth = latency, data = member_preds) %>%
-  mutate(member = colnames(member_preds))
-#> # A tibble: 7 x 4
+map_dfr(member_preds, rmse, truth = latency, data = member_preds) %>%
+  mutate(member = colnames(member_preds))
+#> # A tibble: 6 x 4
 #>   .metric .estimator .estimate member         
 #>   <chr>   <chr>          <dbl> <chr>          
 #> 1 rmse    standard         0   latency        
-#> 2 rmse    standard        55.5 .pred          
+#> 2 rmse    standard        55.3 .pred          
 #> 3 rmse    standard       114.  knn_res_1_4    
 #> 4 rmse    standard        55.5 lin_reg_res_1_1
-#> 5 rmse    standard       114.  svm_res_1_6    
-#> 6 rmse    standard       114.  svm_res_1_1    
-#> 7 rmse    standard       114.  svm_res_1_2
+#> 5 rmse standard 114. svm_res_1_2 +#> 6 rmse standard 114. svm_res_1_6

As we can see, the stacked ensemble outperforms each of the member models, though is closely followed by one of its members.

-

Voila! You’ve now made use of the stacks package to predict red-eyed tree frog embryo hatching using a stacked ensemble! The full visual outline for these steps can be found here.

+

Voila! You’ve now made use of the stacks package to predict red-eyed tree frog embryo hatching using a stacked ensemble! The full visual outline for these steps can be found here.

@@ -514,7 +513,7 @@

Developed by Simon Couch, Max Kuhn. - Site built by pkgdown. + Site built by pkgdown.

@@ -524,5 +523,7 @@

+ + diff --git a/docs/articles/basics_files/figure-html/members-plot-1.png b/docs/articles/basics_files/figure-html/members-plot-1.png index b2aabd36..cc6c111c 100644 Binary files a/docs/articles/basics_files/figure-html/members-plot-1.png and b/docs/articles/basics_files/figure-html/members-plot-1.png differ diff --git a/docs/articles/basics_files/figure-html/penalty-plot-1.png b/docs/articles/basics_files/figure-html/penalty-plot-1.png index c087b798..41650ee9 100644 Binary files a/docs/articles/basics_files/figure-html/penalty-plot-1.png and b/docs/articles/basics_files/figure-html/penalty-plot-1.png differ diff --git a/docs/articles/basics_files/figure-html/unnamed-chunk-25-1.png b/docs/articles/basics_files/figure-html/unnamed-chunk-25-1.png index a3315f35..11eaeab9 100644 Binary files a/docs/articles/basics_files/figure-html/unnamed-chunk-25-1.png and b/docs/articles/basics_files/figure-html/unnamed-chunk-25-1.png differ diff --git a/docs/articles/basics_files/figure-html/weight-plot-1.png b/docs/articles/basics_files/figure-html/weight-plot-1.png index 2150fe97..535f96e1 100644 Binary files a/docs/articles/basics_files/figure-html/weight-plot-1.png and b/docs/articles/basics_files/figure-html/weight-plot-1.png differ diff --git a/docs/articles/basics_files/header-attrs-2.8/header-attrs.js b/docs/articles/basics_files/header-attrs-2.8/header-attrs.js new file mode 100644 index 00000000..dd57d92e --- /dev/null +++ b/docs/articles/basics_files/header-attrs-2.8/header-attrs.js @@ -0,0 +1,12 @@ +// Pandoc 2.9 adds attributes on both header and div. We remove the former (to +// be compatible with the behavior of Pandoc < 2.8). +document.addEventListener('DOMContentLoaded', function(e) { + var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); + var i, h, a; + for (i = 0; i < hs.length; i++) { + h = hs[i]; + if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 + a = h.attributes; + while (a.length > 0) h.removeAttribute(a[0].name); + } +}); diff --git a/docs/articles/classification_files/figure-html/unnamed-chunk-3-1.png b/docs/articles/classification_files/figure-html/unnamed-chunk-3-1.png index 21c6b747..a09b48a9 100644 Binary files a/docs/articles/classification_files/figure-html/unnamed-chunk-3-1.png and b/docs/articles/classification_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 2ac22925..60dc5f51 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -75,9 +75,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +
@@ -188,6 +189,8 @@

Authors

+ + diff --git a/docs/index.html b/docs/index.html index bbe15203..9a295618 100644 --- a/docs/index.html +++ b/docs/index.html @@ -39,6 +39,8 @@ + +

stacks is generalized with respect to:

-

stacks uses a regularized linear model to combine predictions from ensemble members, though this model type is only one of many possible learning algorithms that could be used to fit a stacked ensemble model. For implementations of additional ensemble learning algorithms, check out h2o and SuperLearner.

+

stacks uses a regularized linear model to combine predictions from ensemble members, though this model type is only one of many possible learning algorithms that could be used to fit a stacked ensemble model. For implementations of additional ensemble learning algorithms, check out h2o and SuperLearner.

Rather than diving right into the implementation, we’ll focus here on how the pieces fit together, conceptually, in building an ensemble with stacks. See the basics vignette for an example of the API in action!

-a grammar

-

At the highest level, ensembles are formed from model definitions. In this package, model definitions are an instance of a minimal workflow, containing a model specification (as defined in the parsnip package) and, optionally, a preprocessor (as defined in the recipes package). Model definitions specify the form of candidate ensemble members.

-

-

To be used in the same ensemble, each of these model definitions must share the same resample. This rsample rset object, when paired with the model definitions, can be used to generate the tuning/fitting results objects for the candidate ensemble members with tune.

-

-

Candidate members first come together in a data_stack object through the add_candidates() function. Principally, these objects are just tibbles, where the first column gives the true outcome in the assessment set (the portion of the training set used for model validation), and the remaining columns give the predictions from each candidate ensemble member. (When the outcome is numeric, there’s only one column per candidate ensemble member. Classification requires as many columns per candidate as there are levels in the outcome variable.) They also bring along a few extra attributes to keep track of model definitions.

-

+a grammar +

At the highest level, ensembles are formed from model definitions. In this package, model definitions are an instance of a minimal workflow, containing a model specification (as defined in the parsnip package) and, optionally, a preprocessor (as defined in the recipes package). Model definitions specify the form of candidate ensemble members.

+
+

A diagram representing “model definitions,” which specify the form of candidate ensemble members. Three colored boxes represent three different model types; a K-nearest neighbors model (in salmon), a linear regression model (in yellow), and a support vector machine model (in green).

+
+

To be used in the same ensemble, each of these model definitions must share the same resample. This rsample rset object, when paired with the model definitions, can be used to generate the tuning/fitting results objects for the candidate ensemble members with tune.

+
+

A diagram representing “candidate members” generated from each model definition. Four salmon-colored boxes labeled “KNN” represent K-nearest neighbors models trained on the resamples with differing hyperparameters. Similarly, the linear regression model generates one candidate member, and the support vector machine model generates six.

+
+

Candidate members first come together in a data_stack object through the add_candidates() function. Principally, these objects are just tibbles, where the first column gives the true outcome in the assessment set (the portion of the training set used for model validation), and the remaining columns give the predictions from each candidate ensemble member. (When the outcome is numeric, there’s only one column per candidate ensemble member. Classification requires as many columns per candidate as there are levels in the outcome variable.) They also bring along a few extra attributes to keep track of model definitions.

+
+

A diagram representing a “data stack,” a specific kind of data frame. Colored “columns” depict, in white, the true value of the outcome variable in the validation set, followed by four columns (in salmon) representing the predictions from the K-nearest neighbors model, one column (in tan) representing the linear regression model, and six (in green) representing the support vector machine model.

+

Then, the data stack can be evaluated using blend_predictions() to determine to how best to combine the outputs from each of the candidate members. In the stacking literature, this process is commonly called metalearning.

The outputs of each member are likely highly correlated. Thus, depending on the degree of regularization you choose, the coefficients for the inputs of (possibly) many of the members will zero out—their predictions will have no influence on the final output, and those terms will thus be thrown out.

-

+
+

A diagram representing “stacking coefficients,” the coefficients of the linear model combining each of the candidate member predictions to generate the ensemble’s ultimate prediction. Boxes for each of the candidate members are placed besides each other, filled in with color if the coefficient for the associated candidate member is nonzero.

+

These stacking coefficients determine which candidate ensemble members will become ensemble members. Candidates with non-zero stacking coefficients are then fitted on the whole training set, altogether making up a model_stack object.

-

+
+

A diagram representing the “model stack” class, which collates the stacking coefficients and members (candidate members with nonzero stacking coefficients that are trained on the full training set). The representation of the stacking coefficients is as before, where the members (shown next to their associated stacking coefficients) are colored-in pentagons. Model stacks are a list subclass.

+

This model stack object, outputted from fit_members(), is ready to predict on new data! The trained ensemble members are often referred to as base models in the stacking literature.

-

The full visual outline for these steps can be found here. The API for the package closely mirrors these ideas. See the basics vignette for an example of how this grammar is implemented!

+

The full visual outline for these steps can be found here. The API for the package closely mirrors these ideas. See the basics vignette for an example of how this grammar is implemented!

-contributing

-

This project is released with a Contributor Code of Conduct. By contributing to this project, you agree to abide by its terms.

+contributing +

This project is released with a Contributor Code of Conduct. By contributing to this project, you agree to abide by its terms.

In the stacks package, some test objects take too long to build with every commit. If your contribution changes the structure of data_stack or model_stacks objects, please regenerate these test objects by running the scripts in man-roxygen/example_models.Rmd, including those with chunk options eval = FALSE.

@@ -176,7 +182,7 @@

@@ -237,5 +247,7 @@

Dev status

+ + diff --git a/docs/news/index.html b/docs/news/index.html index c592cb77..b64a6ad6 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -75,9 +75,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +
@@ -142,7 +147,7 @@
@@ -174,6 +179,8 @@

Contents

+ + diff --git a/docs/package-logo.png b/docs/package-logo.png new file mode 100644 index 00000000..5b05d35a Binary files /dev/null and b/docs/package-logo.png differ diff --git a/docs/pkgdown.css b/docs/pkgdown.css index 1273238d..e788b18a 100644 --- a/docs/pkgdown.css +++ b/docs/pkgdown.css @@ -264,21 +264,16 @@ table { /* Syntax highlighting ---------------------------------------------------- */ -pre { - word-wrap: normal; - word-break: normal; - border: 1px solid #eee; -} - -pre, code { +pre, pre code { background-color: #f8f8f8; color: #333; + white-space: pre-wrap; + word-break: break-all; + overflow-wrap: break-word; } -pre code { - overflow: auto; - word-wrap: normal; - white-space: pre; +pre { + border: 1px solid #eee; } pre .img { @@ -305,9 +300,8 @@ a.sourceLine:hover { .kw {color: #264D66;} /* keyword */ .co {color: #888888;} /* comment */ -.message { color: black; font-weight: bolder;} -.error { color: orange; font-weight: bolder;} -.warning { color: #6A0366; font-weight: bolder;} +.error {font-weight: bolder;} +.warning {font-weight: bolder;} /* Clipboard --------------------------*/ diff --git a/docs/pkgdown.js b/docs/pkgdown.js index 7e7048fa..956ef70a 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -80,7 +80,7 @@ $(document).ready(function() { var copyButton = ""; - $(".examples, div.sourceCode").addClass("hasCopyButton"); + $("div.sourceCode").addClass("hasCopyButton"); // Insert copy buttons: $(copyButton).prependTo(".hasCopyButton"); diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index cd3be80d..d6af8151 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,10 +1,10 @@ -pandoc: 2.11.2 -pkgdown: 1.6.1 -pkgdown_sha: ~ +pandoc: 2.11.4 +pkgdown: 1.6.1.9001 +pkgdown_sha: ce9781a15c7ea07df9fb17a11295ba4abec0b54b articles: basics: basics.html classification: classification.html -last_built: 2021-04-19T16:21Z +last_built: 2021-07-22T20:13Z urls: reference: https://stacks.tidymodels.org/reference article: https://stacks.tidymodels.org/articles diff --git a/docs/reference/add_candidates.html b/docs/reference/add_candidates.html index 58521609..91a87aca 100644 --- a/docs/reference/add_candidates.html +++ b/docs/reference/add_candidates.html @@ -89,9 +89,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +
-
add_candidates(
+    
add_candidates(
   data_stack,
   candidates,
   name = deparse(substitute(candidates)),
   ...
-)
+)

Arguments

@@ -202,8 +202,8 @@

Arg or resample_results object outputted from tune::tune_grid(), tune::tune_bayes(), or tune::fit_resamples(). These results must have been fitted with the control settings -save_pred = TRUE, save_workflow = TRUE—see the control_stack_grid(), -control_stack_bayes(), and control_stack_resamples() +save_pred = TRUE, save_workflow = TRUE—see the control_stack_grid(), +control_stack_bayes(), and control_stack_resamples() documentation for helper functions.

@@ -268,74 +268,84 @@

See a stacks()

Examples

-
# \donttest{ -# see the "Example Data" section above for -# clarification on the objects used in these examples! - -# put together a data stack using -# tuning results for regression models -reg_st <- - stacks() %>% - add_candidates(reg_res_lr) %>% - add_candidates(reg_res_svm) %>% - add_candidates(reg_res_sp) - -reg_st -
#> # A data stack with 3 model definitions and 15 candidate members: -#> # reg_res_lr: 1 model configuration -#> # reg_res_svm: 5 model configurations -#> # reg_res_sp: 9 model configurations -#> # Outcome: latency (numeric)
-# do the same with multinomial classification models -class_st <- - stacks() %>% - add_candidates(class_res_nn) %>% - add_candidates(class_res_rf) - -class_st -
#> # A data stack with 2 model definitions and 11 candidate members: -#> # class_res_nn: 1 model configuration -#> # class_res_rf: 10 model configurations -#> # Outcome: reflex (factor)
-# ...or binomial classification models -log_st <- - stacks() %>% - add_candidates(log_res_nn) %>% - add_candidates(log_res_rf) - -log_st -
#> # A data stack with 2 model definitions and 11 candidate members: -#> # log_res_nn: 1 model configuration -#> # log_res_rf: 10 model configurations -#> # Outcome: hatched (factor)
-# use custom names for each model: -log_st2 <- - stacks() %>% - add_candidates(log_res_nn, name = "neural_network") %>% - add_candidates(log_res_rf, name = "random_forest") - -log_st2 -
#> # A data stack with 2 model definitions and 11 candidate members: -#> # neural_network: 1 model configuration -#> # random_forest: 10 model configurations -#> # Outcome: hatched (factor)
-# these objects would likely then be -# passed to blend_predictions(): -log_st2 %>% blend_predictions() -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 11 possible candidate members, the ensemble retained 4. -#> Penalty: 0.001. -#> Mixture: 1.
#> -#> The 4 highest weighted member classes are:
#> # A tibble: 4 x 3 -#> member type weight -#> <chr> <chr> <dbl> -#> 1 .pred_yes_neural_network_1_1 mlp 6.09 -#> 2 .pred_yes_random_forest_1_09 rand_forest 1.84 -#> 3 .pred_yes_random_forest_1_05 rand_forest 1.45 -#> 4 .pred_yes_random_forest_1_06 rand_forest 0.792
#> -#> Members have not yet been fitted with `fit_members()`.
# } - -
+
# \donttest{
+# see the "Example Data" section above for
+# clarification on the objects used in these examples!
+
+# put together a data stack using
+# tuning results for regression models
+reg_st <- 
+  stacks() %>%
+  add_candidates(reg_res_lr) %>%
+  add_candidates(reg_res_svm) %>%
+  add_candidates(reg_res_sp)
+#> Registered S3 method overwritten by 'tune':
+#>   method                   from   
+#>   required_pkgs.model_spec parsnip
+  
+reg_st
+#> # A data stack with 3 model definitions and 15 candidate members:
+#> #   reg_res_lr: 1 model configuration
+#> #   reg_res_svm: 5 model configurations
+#> #   reg_res_sp: 9 model configurations
+#> # Outcome: latency (numeric)
+  
+# do the same with multinomial classification models
+class_st <-
+  stacks() %>%
+  add_candidates(class_res_nn) %>%
+  add_candidates(class_res_rf)
+  
+class_st
+#> # A data stack with 2 model definitions and 11 candidate members:
+#> #   class_res_nn: 1 model configuration
+#> #   class_res_rf: 10 model configurations
+#> # Outcome: reflex (factor)
+  
+# ...or binomial classification models
+log_st <-
+  stacks() %>%
+  add_candidates(log_res_nn) %>%
+  add_candidates(log_res_rf)
+  
+log_st
+#> # A data stack with 2 model definitions and 11 candidate members:
+#> #   log_res_nn: 1 model configuration
+#> #   log_res_rf: 10 model configurations
+#> # Outcome: hatched (factor)
+  
+# use custom names for each model:
+log_st2 <-
+  stacks() %>%
+  add_candidates(log_res_nn, name = "neural_network") %>%
+  add_candidates(log_res_rf, name = "random_forest")
+  
+log_st2
+#> # A data stack with 2 model definitions and 11 candidate members:
+#> #   neural_network: 1 model configuration
+#> #   random_forest: 10 model configurations
+#> # Outcome: hatched (factor)
+  
+# these objects would likely then be
+# passed to blend_predictions():
+log_st2 %>% blend_predictions()
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Out of 11 possible candidate members, the ensemble retained 2.
+#> Penalty: 0.01.
+#> Mixture: 1.
+#> 
+#> The 2 highest weighted member classes are:
+#> # A tibble: 2 x 3
+#>   member                       type        weight
+#>   <chr>                        <chr>        <dbl>
+#> 1 .pred_yes_neural_network_1_1 mlp           7.08
+#> 2 .pred_yes_random_forest_1_03 rand_forest   3.00
+#> 
+#> Members have not yet been fitted with `fit_members()`.
+# }
+
+
@@ -228,6 +228,8 @@

Contents

+ + diff --git a/docs/reference/axe_model_stack.html b/docs/reference/axe_model_stack.html index 1053bb3a..1a953316 100644 --- a/docs/reference/axe_model_stack.html +++ b/docs/reference/axe_model_stack.html @@ -81,9 +81,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +
-
# S3 method for model_stack
+    
# S3 method for model_stack
 axe_call(x, verbose = FALSE, ...)
 
 # S3 method for model_stack
@@ -179,7 +179,7 @@ 

Axing a model_stack.

axe_env(x, verbose = FALSE, ...) # S3 method for model_stack -axe_fitted(x, verbose = FALSE, ...)
+axe_fitted(x, verbose = FALSE, ...)

Arguments

@@ -205,35 +205,49 @@

Value

Axed model_stack object.

Examples

-
# \donttest{ -# build a regression model stack -st <- - stacks() %>% - add_candidates(reg_res_lr) %>% - add_candidates(reg_res_sp) %>% - blend_predictions() %>% - fit_members() - -# remove any of the "butcherable" -# elements individually -axe_call(st) -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Print methods for butchered model stacks are disabled.
axe_ctrl(st) -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Print methods for butchered model stacks are disabled.
axe_data(st) -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Print methods for butchered model stacks are disabled.
axe_fitted(st) -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Print methods for butchered model stacks are disabled.
axe_env(st) -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Print methods for butchered model stacks are disabled.
-# or do it all at once! -butchered_st <- butcher(st, verbose = TRUE) -
#> Memory released: '254,144 B'
#> x Disabled: `print()`, `summary()`
-format(object.size(st)) -
#> [1] "12970664 bytes"
format(object.size(butchered_st)) -
#> [1] "7193880 bytes"
# } -
+
# \donttest{
+# build a regression model stack
+st <-
+  stacks() %>%
+  add_candidates(reg_res_lr) %>%
+  add_candidates(reg_res_sp) %>%
+  blend_predictions() %>%
+  fit_members()
+  
+# remove any of the "butcherable"
+# elements individually
+axe_call(st)
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Print methods for butchered model stacks are disabled.
+axe_ctrl(st)
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Print methods for butchered model stacks are disabled.
+axe_data(st)
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Print methods for butchered model stacks are disabled.
+axe_fitted(st)
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Print methods for butchered model stacks are disabled.
+axe_env(st)
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Print methods for butchered model stacks are disabled.
+
+# or do it all at once!
+butchered_st <- butcher(st, verbose = TRUE)
+#>  Memory released: '122,712 B'
+#>  Disabled: `print()`, `summary()`
+
+format(object.size(st))
+#> [1] "9124400 bytes"
+format(object.size(butchered_st))
+#> [1] "4292936 bytes"
+# }
+

Arguments

@@ -250,7 +250,7 @@

Details

Note that a regularized linear model is one of many possible learning algorithms that could be used to fit a stacked ensemble model. For implementations of additional ensemble learning algorithms, see -h2o::h2o.stackedEnsemble() and SuperLearner::SuperLearner().

+h2o::h2o.stackedEnsemble() and SuperLearner::SuperLearner().

Example Data

@@ -299,161 +299,193 @@

See a stacks()

Examples

-
# \donttest{ -# see the "Example Data" section above for -# clarification on the objects used in these examples! - -# put together a data stack -reg_st <- - stacks() %>% - add_candidates(reg_res_lr) %>% - add_candidates(reg_res_svm) %>% - add_candidates(reg_res_sp) - -reg_st -
#> # A data stack with 3 model definitions and 15 candidate members: -#> # reg_res_lr: 1 model configuration -#> # reg_res_svm: 5 model configurations -#> # reg_res_sp: 9 model configurations -#> # Outcome: latency (numeric)
-# evaluate the data stack -reg_st %>% - blend_predictions() -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 15 possible candidate members, the ensemble retained 5. -#> Penalty: 1e-06. -#> Mixture: 1.
#> -#> The 5 highest weighted members are:
#> # A tibble: 5 x 3 -#> member type weight -#> <chr> <chr> <dbl> -#> 1 reg_res_svm_1_1 svm_rbf 0.443 -#> 2 reg_res_sp_4_1 linear_reg 0.275 -#> 3 reg_res_svm_1_3 svm_rbf 0.270 -#> 4 reg_res_sp_9_1 linear_reg 0.0779 -#> 5 reg_res_sp_2_1 linear_reg 0.0410
#> -#> Members have not yet been fitted with `fit_members()`.
-# include fewer models by proposing higher penalties -reg_st %>% - blend_predictions(penalty = c(.5, 1)) -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 15 possible candidate members, the ensemble retained 5. -#> Penalty: 1. -#> Mixture: 1.
#> -#> The 5 highest weighted members are:
#> # A tibble: 5 x 3 -#> member type weight -#> <chr> <chr> <dbl> -#> 1 reg_res_svm_1_1 svm_rbf 0.435 -#> 2 reg_res_svm_1_3 svm_rbf 0.251 -#> 3 reg_res_sp_4_1 linear_reg 0.243 -#> 4 reg_res_sp_9_1 linear_reg 0.0901 -#> 5 reg_res_sp_2_1 linear_reg 0.0575
#> -#> Members have not yet been fitted with `fit_members()`.
-# allow for negative stacking coefficients -# with the non_negative argument -reg_st %>% - blend_predictions(non_negative = FALSE) -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 15 possible candidate members, the ensemble retained 10. -#> Penalty: 0.1. -#> Mixture: 1.
#> -#> The 10 highest weighted members are:
#> # A tibble: 10 x 3 -#> member type weight -#> <chr> <chr> <dbl> -#> 1 reg_res_sp_8_1 linear_reg 2.06 -#> 2 reg_res_sp_4_1 linear_reg 0.473 -#> 3 reg_res_svm_1_1 svm_rbf 0.468 -#> 4 reg_res_sp_9_1 linear_reg 0.266 -#> 5 reg_res_svm_1_3 svm_rbf 0.152 -#> 6 reg_res_sp_7_1 linear_reg 0.108 -#> 7 reg_res_svm_1_4 svm_rbf -0.0165 -#> 8 reg_res_sp_6_1 linear_reg -0.550 -#> 9 reg_res_sp_3_1 linear_reg -1.91 -#> 10 reg_res_svm_1_2 svm_rbf -7.37
#> -#> Members have not yet been fitted with `fit_members()`.
-# use a custom metric in tuning the lasso penalty -library(yardstick) -reg_st %>% - blend_predictions(metric = metric_set(rmse)) -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 15 possible candidate members, the ensemble retained 5. -#> Penalty: 0.1. -#> Mixture: 1.
#> -#> The 5 highest weighted members are:
#> # A tibble: 5 x 3 -#> member type weight -#> <chr> <chr> <dbl> -#> 1 reg_res_svm_1_1 svm_rbf 0.442 -#> 2 reg_res_svm_1_3 svm_rbf 0.265 -#> 3 reg_res_sp_4_1 linear_reg 0.261 -#> 4 reg_res_sp_9_1 linear_reg 0.0860 -#> 5 reg_res_sp_2_1 linear_reg 0.0480
#> -#> Members have not yet been fitted with `fit_members()`.
-# pass control options for stack blending -reg_st %>% - blend_predictions( - control = tune::control_grid(allow_par = TRUE) - ) -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 15 possible candidate members, the ensemble retained 5. -#> Penalty: 1e-06. -#> Mixture: 1.
#> -#> The 5 highest weighted members are:
#> # A tibble: 5 x 3 -#> member type weight -#> <chr> <chr> <dbl> -#> 1 reg_res_svm_1_1 svm_rbf 0.443 -#> 2 reg_res_sp_4_1 linear_reg 0.275 -#> 3 reg_res_svm_1_3 svm_rbf 0.270 -#> 4 reg_res_sp_9_1 linear_reg 0.0779 -#> 5 reg_res_sp_2_1 linear_reg 0.0410
#> -#> Members have not yet been fitted with `fit_members()`.
-# the process looks the same with -# multinomial classification models -class_st <- - stacks() %>% - add_candidates(class_res_nn) %>% - add_candidates(class_res_rf) %>% - blend_predictions() -
#> ! Bootstrap03: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -77); ...
#> ! Bootstrap06: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -70); ...
#> ! Bootstrap07: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -72); ...
#> ! Bootstrap16: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -68); ...
#> ! Bootstrap18: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -89); ...
#> ! Bootstrap21: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -56); ...
#> ! Bootstrap22: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -66); ...
#> ! Bootstrap24: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -61); ...
-class_st -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 22 possible candidate members, the ensemble retained 10. -#> Penalty: 0.001. -#> Mixture: 1.
#> Across the 3 classes, there are an average of 3.33 coefficients per class.
#> -#> The 10 highest weighted member classes are:
#> # A tibble: 10 x 4 -#> member type weight class -#> <chr> <chr> <dbl> <chr> -#> 1 .pred_full_class_res_nn_1_1 mlp 28.8 full -#> 2 .pred_mid_class_res_rf_1_01 rand_forest 10.9 mid -#> 3 .pred_mid_class_res_nn_1_1 mlp 7.82 mid -#> 4 .pred_mid_class_res_rf_1_04 rand_forest 5.76 low -#> 5 .pred_mid_class_res_rf_1_08 rand_forest 5.53 low -#> 6 .pred_mid_class_res_rf_1_07 rand_forest 4.48 low -#> 7 .pred_mid_class_res_rf_1_05 rand_forest 1.80 mid -#> 8 .pred_mid_class_res_rf_1_10 rand_forest 1.36 mid -#> 9 .pred_mid_class_res_rf_1_02 rand_forest 0.552 low -#> 10 .pred_full_class_res_rf_1_04 rand_forest 0.284 mid
#> -#> Members have not yet been fitted with `fit_members()`.
-# ...or binomial classification models -log_st <- - stacks() %>% - add_candidates(log_res_nn) %>% - add_candidates(log_res_rf) %>% - blend_predictions() - -log_st -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 11 possible candidate members, the ensemble retained 4. -#> Penalty: 1e-04. -#> Mixture: 1.
#> -#> The 4 highest weighted member classes are:
#> # A tibble: 4 x 3 -#> member type weight -#> <chr> <chr> <dbl> -#> 1 .pred_yes_log_res_nn_1_1 mlp 6.11 -#> 2 .pred_yes_log_res_rf_1_09 rand_forest 1.85 -#> 3 .pred_yes_log_res_rf_1_05 rand_forest 1.45 -#> 4 .pred_yes_log_res_rf_1_06 rand_forest 0.836
#> -#> Members have not yet been fitted with `fit_members()`.
# } - -
+
# \donttest{
+# see the "Example Data" section above for
+# clarification on the objects used in these examples!
+
+# put together a data stack
+reg_st <- 
+  stacks() %>%
+  add_candidates(reg_res_lr) %>%
+  add_candidates(reg_res_svm) %>%
+  add_candidates(reg_res_sp)
+  
+reg_st
+#> # A data stack with 3 model definitions and 15 candidate members:
+#> #   reg_res_lr: 1 model configuration
+#> #   reg_res_svm: 5 model configurations
+#> #   reg_res_sp: 9 model configurations
+#> # Outcome: latency (numeric)
+
+# evaluate the data stack
+reg_st %>%
+  blend_predictions()
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Out of 15 possible candidate members, the ensemble retained 4.
+#> Penalty: 0.1.
+#> Mixture: 1.
+#> 
+#> The 4 highest weighted members are:
+#> # A tibble: 4 x 3
+#>   member          type       weight
+#>   <chr>           <chr>       <dbl>
+#> 1 reg_res_svm_1_5 svm_rbf     2.64 
+#> 2 reg_res_svm_1_3 svm_rbf     0.675
+#> 3 reg_res_svm_1_1 svm_rbf     0.302
+#> 4 reg_res_sp_2_1  linear_reg  0.236
+#> 
+#> Members have not yet been fitted with `fit_members()`.
+
+# include fewer models by proposing higher penalties
+reg_st %>% 
+  blend_predictions(penalty = c(.5, 1))
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Out of 15 possible candidate members, the ensemble retained 4.
+#> Penalty: 1.
+#> Mixture: 1.
+#> 
+#> The 4 highest weighted members are:
+#> # A tibble: 4 x 3
+#>   member          type       weight
+#>   <chr>           <chr>       <dbl>
+#> 1 reg_res_svm_1_5 svm_rbf     1.15 
+#> 2 reg_res_svm_1_3 svm_rbf     0.673
+#> 3 reg_res_svm_1_1 svm_rbf     0.289
+#> 4 reg_res_sp_2_1  linear_reg  0.230
+#> 
+#> Members have not yet been fitted with `fit_members()`.
+
+# allow for negative stacking coefficients 
+# with the non_negative argument
+reg_st %>% 
+  blend_predictions(non_negative = FALSE)
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Out of 15 possible candidate members, the ensemble retained 9.
+#> Penalty: 0.1.
+#> Mixture: 1.
+#> 
+#> The 9 highest weighted members are:
+#> # A tibble: 9 x 3
+#>   member          type       weight
+#>   <chr>           <chr>       <dbl>
+#> 1 reg_res_svm_1_5 svm_rbf     1.44 
+#> 2 reg_res_sp_2_1  linear_reg  1.11 
+#> 3 reg_res_svm_1_3 svm_rbf     1.05 
+#> 4 reg_res_sp_9_1  linear_reg -0.538
+#> 5 reg_res_lr_1_1  linear_reg -0.489
+#> 6 reg_res_sp_8_1  linear_reg  0.467
+#> 7 reg_res_svm_1_1 svm_rbf     0.400
+#> 8 reg_res_sp_4_1  linear_reg -0.367
+#> 9 reg_res_sp_1_1  linear_reg -0.288
+#> 
+#> Members have not yet been fitted with `fit_members()`.
+  
+# use a custom metric in tuning the lasso penalty
+library(yardstick)
+reg_st %>% 
+  blend_predictions(metric = metric_set(rmse))
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Out of 15 possible candidate members, the ensemble retained 4.
+#> Penalty: 0.1.
+#> Mixture: 1.
+#> 
+#> The 4 highest weighted members are:
+#> # A tibble: 4 x 3
+#>   member          type       weight
+#>   <chr>           <chr>       <dbl>
+#> 1 reg_res_svm_1_5 svm_rbf     2.64 
+#> 2 reg_res_svm_1_3 svm_rbf     0.675
+#> 3 reg_res_svm_1_1 svm_rbf     0.302
+#> 4 reg_res_sp_2_1  linear_reg  0.236
+#> 
+#> Members have not yet been fitted with `fit_members()`.
+  
+# pass control options for stack blending
+reg_st %>% 
+  blend_predictions(
+    control = tune::control_grid(allow_par = TRUE)
+  )
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Out of 15 possible candidate members, the ensemble retained 4.
+#> Penalty: 1e-06.
+#> Mixture: 1.
+#> 
+#> The 4 highest weighted members are:
+#> # A tibble: 4 x 3
+#>   member          type       weight
+#>   <chr>           <chr>       <dbl>
+#> 1 reg_res_svm_1_5 svm_rbf     2.80 
+#> 2 reg_res_svm_1_3 svm_rbf     0.674
+#> 3 reg_res_svm_1_1 svm_rbf     0.304
+#> 4 reg_res_sp_2_1  linear_reg  0.237
+#> 
+#> Members have not yet been fitted with `fit_members()`.
+  
+# the process looks the same with 
+# multinomial classification models
+class_st <-
+  stacks() %>%
+  add_candidates(class_res_nn) %>%
+  add_candidates(class_res_rf) %>%
+  blend_predictions()
+#> ! Bootstrap01: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -61); ...
+#> ! Bootstrap05: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -76); ...
+#> ! Bootstrap10: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -76); ...
+#> ! Bootstrap12: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -82); ...
+#> ! Bootstrap14: internal: No observations were detected in `truth` for level(s): 'low', ...
+#> ! Bootstrap19: internal: No observations were detected in `truth` for level(s): 'low', ...
+  
+class_st
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Out of 22 possible candidate members, the ensemble retained 5.
+#> Penalty: 0.1.
+#> Mixture: 1.
+#> Across the 3 classes, there are an average of 2.5 coefficients per class.
+#> 
+#> The 5 highest weighted member classes are:
+#> # A tibble: 5 x 4
+#>   member                       type         weight class
+#>   <chr>                        <chr>         <dbl> <chr>
+#> 1 .pred_full_class_res_nn_1_1  mlp         8.26    full 
+#> 2 .pred_full_class_res_rf_1_03 rand_forest 1.30    full 
+#> 3 .pred_mid_class_res_rf_1_01  rand_forest 0.993   mid  
+#> 4 .pred_full_class_res_rf_1_05 rand_forest 0.0267  full 
+#> 5 .pred_full_class_res_rf_1_08 rand_forest 0.00747 full 
+#> 
+#> Members have not yet been fitted with `fit_members()`.
+
+# ...or binomial classification models
+log_st <-
+  stacks() %>%
+  add_candidates(log_res_nn) %>%
+  add_candidates(log_res_rf) %>%
+  blend_predictions()
+  
+log_st
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Out of 11 possible candidate members, the ensemble retained 3.
+#> Penalty: 1e-06.
+#> Mixture: 1.
+#> 
+#> The 3 highest weighted member classes are:
+#> # A tibble: 3 x 3
+#>   member                    type        weight
+#>   <chr>                     <chr>        <dbl>
+#> 1 .pred_yes_log_res_nn_1_1  mlp          7.37 
+#> 2 .pred_yes_log_res_rf_1_03 rand_forest  3.29 
+#> 3 .pred_yes_log_res_rf_1_06 rand_forest  0.124
+#> 
+#> Members have not yet been fitted with `fit_members()`.
+# }
+
+

Arguments

@@ -217,6 +217,8 @@

Contents

+ + diff --git a/docs/reference/collect_parameters.html b/docs/reference/collect_parameters.html index 4ede9292..22db0dc0 100644 --- a/docs/reference/collect_parameters.html +++ b/docs/reference/collect_parameters.html @@ -79,9 +79,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +
-
collect_parameters(stack, candidates, ...)
+    
collect_parameters(stack, candidates, ...)
 
 # S3 method for default
 collect_parameters(stack, candidates, ...)
@@ -171,7 +171,7 @@ 

Collect candidate parameters and stacking coefficients

collect_parameters(stack, candidates, ...) # S3 method for model_stack -collect_parameters(stack, candidates, ...)
+collect_parameters(stack, candidates, ...)

Arguments

@@ -238,66 +238,70 @@

< the source code that generated them.

Examples

-
# \donttest{ -# see the "Example Data" section above for -# clarification on the objects used in these examples! - -# put together a data stack using -# tuning results for regression models -reg_st <- - stacks() %>% - add_candidates(reg_res_lr) %>% - add_candidates(reg_res_svm) %>% - add_candidates(reg_res_sp, "spline") - -reg_st -
#> # A data stack with 3 model definitions and 15 candidate members: -#> # reg_res_lr: 1 model configuration -#> # reg_res_svm: 5 model configurations -#> # spline: 9 model configurations -#> # Outcome: latency (numeric)
-# check out the hyperparameters for some of the candidates -collect_parameters(reg_st, "reg_res_svm") -
#> # A tibble: 5 x 3 -#> member cost rbf_sigma -#> <chr> <dbl> <dbl> -#> 1 reg_res_svm_1_1 17.2 1.87e- 1 -#> 2 reg_res_svm_1_2 0.00129 1.28e- 7 -#> 3 reg_res_svm_1_3 3.26 2.54e- 3 -#> 4 reg_res_svm_1_4 0.111 5.19e-10 -#> 5 reg_res_svm_1_5 0.0241 4.02e- 5
-collect_parameters(reg_st, "spline") -
#> # A tibble: 9 x 2 -#> member age -#> <chr> <int> -#> 1 spline_1_1 8 -#> 2 spline_2_1 14 -#> 3 spline_3_1 5 -#> 4 spline_4_1 13 -#> 5 spline_5_1 3 -#> 6 spline_6_1 6 -#> 7 spline_7_1 10 -#> 8 spline_8_1 2 -#> 9 spline_9_1 12
-# blend the data stack to view the hyperparameters -# along with the stacking coefficients! -collect_parameters( - reg_st %>% blend_predictions(), - "spline" -) -
#> # A tibble: 9 x 3 -#> member age coef -#> <chr> <int> <dbl> -#> 1 spline_1_1 8 0 -#> 2 spline_2_1 14 0.0480 -#> 3 spline_3_1 5 0 -#> 4 spline_4_1 13 0.261 -#> 5 spline_5_1 3 0 -#> 6 spline_6_1 6 0 -#> 7 spline_7_1 10 0 -#> 8 spline_8_1 2 0 -#> 9 spline_9_1 12 0.0860
# } -
+
# \donttest{
+# see the "Example Data" section above for
+# clarification on the objects used in these examples!
+
+# put together a data stack using
+# tuning results for regression models
+reg_st <- 
+  stacks() %>%
+  add_candidates(reg_res_lr) %>%
+  add_candidates(reg_res_svm) %>%
+  add_candidates(reg_res_sp, "spline")
+  
+reg_st
+#> # A data stack with 3 model definitions and 15 candidate members:
+#> #   reg_res_lr: 1 model configuration
+#> #   reg_res_svm: 5 model configurations
+#> #   spline: 9 model configurations
+#> # Outcome: latency (numeric)
+  
+# check out the hyperparameters for some of the candidates
+collect_parameters(reg_st, "reg_res_svm")
+#> # A tibble: 5 x 3
+#>   member              cost rbf_sigma
+#>   <chr>              <dbl>     <dbl>
+#> 1 reg_res_svm_1_1 17.2      1.87e- 1
+#> 2 reg_res_svm_1_2  0.00129  1.28e- 7
+#> 3 reg_res_svm_1_3  3.26     2.54e- 3
+#> 4 reg_res_svm_1_4  0.111    5.19e-10
+#> 5 reg_res_svm_1_5  0.0241   4.02e- 5
+
+collect_parameters(reg_st, "spline")
+#> # A tibble: 9 x 2
+#>   member       age
+#>   <chr>      <int>
+#> 1 spline_1_1     8
+#> 2 spline_2_1    14
+#> 3 spline_3_1     5
+#> 4 spline_4_1    13
+#> 5 spline_5_1     3
+#> 6 spline_6_1     6
+#> 7 spline_7_1    10
+#> 8 spline_8_1     2
+#> 9 spline_9_1    12
+
+# blend the data stack to view the hyperparameters 
+# along with the stacking coefficients!
+collect_parameters(
+  reg_st %>% blend_predictions(), 
+  "spline"
+)
+#> # A tibble: 9 x 3
+#>   member       age  coef
+#>   <chr>      <int> <dbl>
+#> 1 spline_1_1     8 0    
+#> 2 spline_2_1    14 0.237
+#> 3 spline_3_1     5 0    
+#> 4 spline_4_1    13 0    
+#> 5 spline_5_1     3 0    
+#> 6 spline_6_1     6 0    
+#> 7 spline_7_1    10 0    
+#> 8 spline_8_1     2 0    
+#> 9 spline_9_1    12 0    
+# }
+

Value

@@ -214,6 +214,8 @@

Contents

+ + diff --git a/docs/reference/example_data.html b/docs/reference/example_data.html index fde1b290..2b0f4b81 100644 --- a/docs/reference/example_data.html +++ b/docs/reference/example_data.html @@ -77,9 +77,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +
-
reg_res_svm
+    
reg_res_svm
 
 reg_res_sp
 
@@ -174,7 +174,7 @@ 

Example Objects

log_res_nn -log_res_rf
+log_res_rf

Format

@@ -191,8 +191,7 @@

FormatSource

Julie Jung et al. (2020) Multimodal mechanosensing enables treefrog -embryos to escape egg-predators. -https://doi.org/10.1242/jeb.236141

+embryos to escape egg-predators. doi: 10.1242/jeb.236141

Details

Red-eyed tree frog (RETF) embryos can hatch earlier than their normal @@ -227,7 +226,7 @@

Details fitting hatched (whether or not the embryos hatched in response to the stimulus) using most all of the other variables as predictors.

The source code for generating these objects is given below.

-

# setup: packages, data, resample, basic recipe ------------------------
+

# setup: packages, data, resample, basic recipe ------------------------
 library(stacks)
 library(tune)
 library(rsample)
@@ -253,9 +252,9 @@ 

Details # for regression, predict latency to hatch (excluding NAs) tree_frogs_reg <- - tree_frogs %>% - filter(!is.na(latency)) %>% - select(-clutch, -hatched) + tree_frogs %>% + filter(!is.na(latency)) %>% + select(-clutch, -hatched) set.seed(1) tree_frogs_reg_split <- rsample::initial_split(tree_frogs_reg) @@ -270,20 +269,20 @@

Details reg_folds <- rsample::vfold_cv(tree_frogs_reg_train, v = 5) tree_frogs_reg_rec <- - recipes::recipe(latency ~ ., data = tree_frogs_reg_train) %>% - recipes::step_dummy(recipes::all_nominal()) %>% + recipes::recipe(latency ~ ., data = tree_frogs_reg_train) %>% + recipes::step_dummy(recipes::all_nominal()) %>% recipes::step_zv(recipes::all_predictors()) metric <- yardstick::metric_set(yardstick::rmse) # linear regression --------------------------------------- lin_reg_spec <- - parsnip::linear_reg() %>% + parsnip::linear_reg() %>% parsnip::set_engine("lm") reg_wf_lr <- - workflows::workflow() %>% - workflows::add_model(lin_reg_spec) %>% + workflows::workflow() %>% + workflows::add_model(lin_reg_spec) %>% workflows::add_recipe(tree_frogs_reg_rec) set.seed(1) @@ -300,13 +299,13 @@

Details parsnip::svm_rbf( cost = tune::tune(), rbf_sigma = tune::tune() - ) %>% - parsnip::set_engine("kernlab") %>% + ) %>% + parsnip::set_engine("kernlab") %>% parsnip::set_mode("regression") reg_wf_svm <- - workflows::workflow() %>% - workflows::add_model(svm_spec) %>% + workflows::workflow() %>% + workflows::add_model(svm_spec) %>% workflows::add_recipe(tree_frogs_reg_rec) set.seed(1) @@ -320,12 +319,12 @@

Details # spline regression --------------------------------------- spline_rec <- - tree_frogs_reg_rec %>% + tree_frogs_reg_rec %>% recipes::step_ns(age, deg_free = tune::tune("age")) reg_wf_sp <- - workflows::workflow() %>% - workflows::add_model(lin_reg_spec) %>% + workflows::workflow() %>% + workflows::add_model(lin_reg_spec) %>% workflows::add_recipe(spline_rec) set.seed(1) @@ -339,7 +338,7 @@

Details # classification - preliminaries ----------------------------------- tree_frogs_class <- - tree_frogs %>% + tree_frogs %>% dplyr::select(-c(clutch, latency)) set.seed(1) @@ -355,9 +354,9 @@

Details class_folds <- rsample::vfold_cv(tree_frogs_class_train, v = 5) tree_frogs_class_rec <- - recipes::recipe(reflex ~ ., data = tree_frogs_class_train) %>% - recipes::step_dummy(recipes::all_nominal(), -reflex) %>% - recipes::step_zv(recipes::all_predictors()) %>% + recipes::recipe(reflex ~ ., data = tree_frogs_class_train) %>% + recipes::step_dummy(recipes::all_nominal(), -reflex) %>% + recipes::step_zv(recipes::all_predictors()) %>% recipes::step_normalize(recipes::all_numeric()) # random forest classification -------------------------------------- @@ -366,13 +365,13 @@

Details mtry = tune::tune(), trees = 500, min_n = tune::tune() - ) %>% - parsnip::set_mode("classification") %>% + ) %>% + parsnip::set_mode("classification") %>% parsnip::set_engine("ranger") class_wf_rf <- - workflows::workflow() %>% - workflows::add_recipe(tree_frogs_class_rec) %>% + workflows::workflow() %>% + workflows::add_recipe(tree_frogs_class_rec) %>% workflows::add_model(rand_forest_spec) set.seed(1) @@ -386,13 +385,13 @@

Details # neural network classification ------------------------------------- nnet_spec <- - mlp(hidden_units = 5, penalty = 0.01, epochs = 100) %>% - set_mode("classification") %>% + mlp(hidden_units = 5, penalty = 0.01, epochs = 100) %>% + set_mode("classification") %>% set_engine("nnet") class_wf_nn <- - workflows::workflow() %>% - workflows::add_recipe(tree_frogs_class_rec) %>% + workflows::workflow() %>% + workflows::add_recipe(tree_frogs_class_rec) %>% workflows::add_model(nnet_spec) set.seed(1) @@ -405,9 +404,9 @@

Details # binary classification -------------------------------- tree_frogs_2_class_rec <- - recipes::recipe(hatched ~ ., data = tree_frogs_class_train) %>% - recipes::step_dummy(recipes::all_nominal(), -hatched) %>% - recipes::step_zv(recipes::all_predictors()) %>% + recipes::recipe(hatched ~ ., data = tree_frogs_class_train) %>% + recipes::step_dummy(recipes::all_nominal(), -hatched) %>% + recipes::step_zv(recipes::all_predictors()) %>% recipes::step_normalize(recipes::all_numeric()) set.seed(1) @@ -416,13 +415,13 @@

Details mtry = tune(), trees = 500, min_n = tune() - ) %>% - parsnip::set_mode("classification") %>% + ) %>% + parsnip::set_mode("classification") %>% parsnip::set_engine("ranger") log_wf_rf <- - workflows::workflow() %>% - workflows::add_recipe(tree_frogs_2_class_rec) %>% + workflows::workflow() %>% + workflows::add_recipe(tree_frogs_2_class_rec) %>% workflows::add_model(rand_forest_spec_2) set.seed(1) @@ -435,13 +434,13 @@

Details ) nnet_spec_2 <- - parsnip::mlp(epochs = 100, hidden_units = 5, penalty = 0.1) %>% - parsnip::set_mode("classification") %>% + parsnip::mlp(epochs = 100, hidden_units = 5, penalty = 0.1) %>% + parsnip::set_mode("classification") %>% parsnip::set_engine("nnet", verbose = 0) log_wf_nn <- - workflows::workflow() %>% - workflows::add_recipe(tree_frogs_2_class_rec) %>% + workflows::workflow() %>% + workflows::add_recipe(tree_frogs_2_class_rec) %>% workflows::add_model(nnet_spec_2) set.seed(1) @@ -451,7 +450,7 @@

Details resamples = class_folds, control = ctrl_res ) -

+

@@ -237,93 +237,80 @@

See a stacks()

Examples

-
# \donttest{ -# see the "Example Data" section above for -# clarification on the objects used in these examples! - -# put together a data stack -reg_st <- - stacks() %>% - add_candidates(reg_res_lr) %>% - add_candidates(reg_res_svm) %>% - add_candidates(reg_res_sp) - -reg_st -
#> # A data stack with 3 model definitions and 15 candidate members: -#> # reg_res_lr: 1 model configuration -#> # reg_res_svm: 5 model configurations -#> # reg_res_sp: 9 model configurations -#> # Outcome: latency (numeric)
-# evaluate the data stack and fit the member models -reg_st %>% - blend_predictions() %>% - fit_members() -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 15 possible candidate members, the ensemble retained 5. -#> Penalty: 0.1. -#> Mixture: 1.
#> -#> The 5 highest weighted members are:
#> # A tibble: 5 x 3 -#> member type weight -#> <chr> <chr> <dbl> -#> 1 reg_res_svm_1_1 svm_rbf 0.442 -#> 2 reg_res_svm_1_3 svm_rbf 0.265 -#> 3 reg_res_sp_4_1 linear_reg 0.261 -#> 4 reg_res_sp_9_1 linear_reg 0.0860 -#> 5 reg_res_sp_2_1 linear_reg 0.0480
-reg_st -
#> # A data stack with 3 model definitions and 15 candidate members: -#> # reg_res_lr: 1 model configuration -#> # reg_res_svm: 5 model configurations -#> # reg_res_sp: 9 model configurations -#> # Outcome: latency (numeric)
-# do the same with multinomial classification models -class_st <- - stacks() %>% - add_candidates(class_res_nn) %>% - add_candidates(class_res_rf) %>% - blend_predictions() %>% - fit_members() -
#> ! Bootstrap05: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -59); ...
#> ! Bootstrap19: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -86); ...
#> ! Bootstrap22: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -61); ...
-class_st -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 22 possible candidate members, the ensemble retained 10. -#> Penalty: 0.001. -#> Mixture: 1.
#> Across the 3 classes, there are an average of 3.33 coefficients per class.
#> -#> The 10 highest weighted member classes are:
#> # A tibble: 10 x 4 -#> member type weight class -#> <chr> <chr> <dbl> <chr> -#> 1 .pred_full_class_res_nn_1_1 mlp 28.8 full -#> 2 .pred_mid_class_res_rf_1_01 rand_forest 10.9 mid -#> 3 .pred_mid_class_res_nn_1_1 mlp 7.82 mid -#> 4 .pred_mid_class_res_rf_1_04 rand_forest 5.76 low -#> 5 .pred_mid_class_res_rf_1_08 rand_forest 5.53 low -#> 6 .pred_mid_class_res_rf_1_07 rand_forest 4.48 low -#> 7 .pred_mid_class_res_rf_1_05 rand_forest 1.80 mid -#> 8 .pred_mid_class_res_rf_1_10 rand_forest 1.36 mid -#> 9 .pred_mid_class_res_rf_1_02 rand_forest 0.552 low -#> 10 .pred_full_class_res_rf_1_04 rand_forest 0.284 mid
-# ...or binomial classification models -log_st <- - stacks() %>% - add_candidates(log_res_nn) %>% - add_candidates(log_res_rf) %>% - blend_predictions() %>% - fit_members() - -log_st -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 11 possible candidate members, the ensemble retained 4. -#> Penalty: 1e-05. -#> Mixture: 1.
#> -#> The 4 highest weighted member classes are:
#> # A tibble: 4 x 3 -#> member type weight -#> <chr> <chr> <dbl> -#> 1 .pred_yes_log_res_nn_1_1 mlp 6.09 -#> 2 .pred_yes_log_res_rf_1_09 rand_forest 1.87 -#> 3 .pred_yes_log_res_rf_1_05 rand_forest 1.45 -#> 4 .pred_yes_log_res_rf_1_06 rand_forest 0.842
# } - -
+
# \donttest{
+# see the "Example Data" section above for
+# clarification on the objects used in these examples!
+
+# put together a data stack
+reg_st <- 
+  stacks() %>%
+  add_candidates(reg_res_lr) %>%
+  add_candidates(reg_res_svm) %>%
+  add_candidates(reg_res_sp)
+  
+reg_st
+#> # A data stack with 3 model definitions and 15 candidate members:
+#> #   reg_res_lr: 1 model configuration
+#> #   reg_res_svm: 5 model configurations
+#> #   reg_res_sp: 9 model configurations
+#> # Outcome: latency (numeric)
+
+# evaluate the data stack and fit the member models
+reg_st %>%
+  blend_predictions() %>%
+  fit_members()
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Out of 15 possible candidate members, the ensemble retained 4.
+#> Penalty: 0.1.
+#> Mixture: 1.
+#> 
+#> The 4 highest weighted members are:
+#> # A tibble: 4 x 3
+#>   member          type       weight
+#>   <chr>           <chr>       <dbl>
+#> 1 reg_res_svm_1_5 svm_rbf     2.64 
+#> 2 reg_res_svm_1_3 svm_rbf     0.675
+#> 3 reg_res_svm_1_1 svm_rbf     0.302
+#> 4 reg_res_sp_2_1  linear_reg  0.236
+  
+reg_st
+#> # A data stack with 3 model definitions and 15 candidate members:
+#> #   reg_res_lr: 1 model configuration
+#> #   reg_res_svm: 5 model configurations
+#> #   reg_res_sp: 9 model configurations
+#> # Outcome: latency (numeric)
+  
+# do the same with multinomial classification models
+class_st <-
+  stacks() %>%
+  add_candidates(class_res_nn) %>%
+  add_candidates(class_res_rf) %>%
+  blend_predictions() %>%
+  fit_members()
+#> ! Bootstrap05: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -89); ...
+#> ! Bootstrap18: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -63); ...
+#> ! Bootstrap23: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -58); ...
+#> ! Bootstrap24: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -79); ...
+#> Error in {    asNamespace("stacks")$fit_member(name = mem, wflows = model_stack[["model_defs"]],         members_map = members_map, train_dat = dat)} task 2 failed - "This engine requires some package installs: 'ranger'"
+  
+class_st
+#> Error in eval(expr, envir, enclos) object 'class_st' not found
+  
+# ...or binomial classification models
+log_st <-
+  stacks() %>%
+  add_candidates(log_res_nn) %>%
+  add_candidates(log_res_rf) %>%
+  blend_predictions() %>%
+  fit_members()
+#> Error in {    asNamespace("stacks")$fit_member(name = mem, wflows = model_stack[["model_defs"]],         members_map = members_map, train_dat = dat)} task 2 failed - "This engine requires some package installs: 'ranger'"
+  
+log_st
+#> Error in eval(expr, envir, enclos) object 'log_st' not found
+# }
+
+

Arguments

@@ -211,6 +211,8 @@

Contents

+ + diff --git a/docs/reference/index.html b/docs/reference/index.html index ae60d65c..3c3a70a9 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -75,9 +75,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +
@@ -323,6 +323,8 @@

Contents

+ + diff --git a/docs/reference/predict.data_stack.html b/docs/reference/predict.data_stack.html index d3219d26..4a1d7a9a 100644 --- a/docs/reference/predict.data_stack.html +++ b/docs/reference/predict.data_stack.html @@ -77,9 +77,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +
-
# S3 method for data_stack
-predict(object, ...)
+
# S3 method for data_stack
+predict(object, ...)

Arguments

Obtain prediction equations for all possible values of type

@@ -202,6 +202,8 @@

Contents

+ + diff --git a/docs/reference/predict.model_stack.html b/docs/reference/predict.model_stack.html index bed28e35..6ae0f38b 100644 --- a/docs/reference/predict.model_stack.html +++ b/docs/reference/predict.model_stack.html @@ -76,9 +76,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +
-
# S3 method for model_stack
-predict(object, new_data, type = NULL, members = FALSE, opts = list(), ...)
+
# S3 method for model_stack
+predict(object, new_data, type = NULL, members = FALSE, opts = list(), ...)

Arguments

@@ -234,158 +234,107 @@

< the source code that generated them.

Examples

-
# \donttest{ -# see the "Example Data" section above for -# clarification on the data and tuning results -# objects used in these examples! - -data(tree_frogs_reg_test) -data(tree_frogs_class_test) - -# build and fit a regression model stack -reg_st <- - stacks() %>% - add_candidates(reg_res_lr) %>% - add_candidates(reg_res_sp) %>% - blend_predictions() %>% - fit_members() - -reg_st -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 10 possible candidate members, the ensemble retained 5. -#> Penalty: 1e-06. -#> Mixture: 1.
#> -#> The 5 highest weighted members are:
#> # A tibble: 5 x 3 -#> member type weight -#> <chr> <chr> <dbl> -#> 1 reg_res_sp_2_1 linear_reg 0.346 -#> 2 reg_res_lr_1_1 linear_reg 0.238 -#> 3 reg_res_sp_4_1 linear_reg 0.224 -#> 4 reg_res_sp_8_1 linear_reg 0.101 -#> 5 reg_res_sp_9_1 linear_reg 0.0537
-# predict on the tree frogs testing data -predict(reg_st, tree_frogs_reg_test) -
#> # A tibble: 143 x 1 -#> .pred -#> <dbl> -#> 1 115. -#> 2 31.7 -#> 3 93.7 -#> 4 122. -#> 5 167. -#> 6 95.2 -#> 7 125. -#> 8 222. -#> 9 167. -#> 10 156. -#> # … with 133 more rows
-# include the predictions from the members -predict(reg_st, tree_frogs_reg_test, members = TRUE) -
#> # A tibble: 143 x 6 -#> .pred reg_res_lr_1_1 reg_res_sp_8_1 reg_res_sp_9_1 reg_res_sp_4_1 -#> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 115. 117. 101. 116. 118. -#> 2 31.7 34.2 26.7 27.8 27.5 -#> 3 93.7 111. 107. 84.1 83.2 -#> 4 122. 106. 112. 135. 132. -#> 5 167. 147. 161. 185. 178. -#> 6 95.2 85.7 98.5 98.8 102. -#> 7 125. 102. 115. 141. 135. -#> 8 222. 224. 210. 229. 231. -#> 9 167. 147. 160. 185. 179. -#> 10 156. 153. 156. 157. 161. -#> # … with 133 more rows, and 1 more variable: reg_res_sp_2_1 <dbl>
-# build and fit a classification model stack -class_st <- - stacks() %>% - add_candidates(class_res_nn) %>% - add_candidates(class_res_rf) %>% - blend_predictions() %>% - fit_members() -
#> ! Bootstrap08: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -73); ...
#> ! Bootstrap14: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -76); ...
#> ! Bootstrap24: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -92); ...
-class_st -
#> ── A stacked ensemble model ─────────────────────────────────────
#> -#> Out of 22 possible candidate members, the ensemble retained 10. -#> Penalty: 0.001. -#> Mixture: 1.
#> Across the 3 classes, there are an average of 3.33 coefficients per class.
#> -#> The 10 highest weighted member classes are:
#> # A tibble: 10 x 4 -#> member type weight class -#> <chr> <chr> <dbl> <chr> -#> 1 .pred_full_class_res_nn_1_1 mlp 28.8 full -#> 2 .pred_mid_class_res_rf_1_01 rand_forest 10.9 mid -#> 3 .pred_mid_class_res_nn_1_1 mlp 7.82 mid -#> 4 .pred_mid_class_res_rf_1_04 rand_forest 5.76 low -#> 5 .pred_mid_class_res_rf_1_08 rand_forest 5.53 low -#> 6 .pred_mid_class_res_rf_1_07 rand_forest 4.48 low -#> 7 .pred_mid_class_res_rf_1_05 rand_forest 1.80 mid -#> 8 .pred_mid_class_res_rf_1_10 rand_forest 1.36 mid -#> 9 .pred_mid_class_res_rf_1_02 rand_forest 0.552 low -#> 10 .pred_full_class_res_rf_1_04 rand_forest 0.284 mid
-# predict reflex, first as a class, then as -# class probabilities -predict(class_st, tree_frogs_class_test) -
#> # A tibble: 303 x 1 -#> .pred_class -#> <fct> -#> 1 full -#> 2 low -#> 3 low -#> 4 full -#> 5 low -#> 6 mid -#> 7 low -#> 8 full -#> 9 full -#> 10 low -#> # … with 293 more rows
predict(class_st, tree_frogs_class_test, type = "prob") -
#> # A tibble: 303 x 3 -#> .pred_full .pred_low .pred_mid -#> <dbl> <dbl> <dbl> -#> 1 0.000000592 0.269 0.731 -#> 2 0.999 0.000733 0.000223 -#> 3 0.999 0.000595 0.000227 -#> 4 0.00000156 0.256 0.744 -#> 5 0.999 0.000705 0.000223 -#> 6 0.000308 0.863 0.137 -#> 7 0.999 0.000595 0.000227 -#> 8 0.0000000651 0.143 0.857 -#> 9 0.00000449 0.343 0.657 -#> 10 0.999 0.000595 0.000227 -#> # … with 293 more rows
-# returning the member predictions as well -predict( - class_st, - tree_frogs_class_test, - type = "prob", - members = TRUE -) -
#> # A tibble: 303 x 27 -#> .pred_full .pred_low .pred_mid .pred_low_class_res_r… .pred_low_class_res_… -#> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 0.000000592 0.269 0.731 0.468 0.313 -#> 2 0.999 0.000733 0.000223 0.102 0.000619 -#> 3 0.999 0.000595 0.000227 0.0458 0.0000355 -#> 4 0.00000156 0.256 0.744 0.509 0.444 -#> 5 0.999 0.000705 0.000223 0.109 0 -#> 6 0.000308 0.863 0.137 0.519 0.603 -#> 7 0.999 0.000595 0.000227 0.0458 0.0000355 -#> 8 0.0000000651 0.143 0.857 0.283 0.167 -#> 9 0.00000449 0.343 0.657 0.509 0.486 -#> 10 0.999 0.000595 0.000227 0.0458 0.0000355 -#> # … with 293 more rows, and 22 more variables: -#> # .pred_low_class_res_rf_1_08 <dbl>, .pred_low_class_res_rf_1_07 <dbl>, -#> # .pred_low_class_res_nn_1_1 <dbl>, .pred_low_class_res_rf_1_10 <dbl>, -#> # .pred_low_class_res_rf_1_05 <dbl>, .pred_low_class_res_rf_1_01 <dbl>, -#> # .pred_mid_class_res_rf_1_04 <dbl>, .pred_mid_class_res_rf_1_02 <dbl>, -#> # .pred_mid_class_res_rf_1_08 <dbl>, .pred_mid_class_res_rf_1_07 <dbl>, -#> # .pred_mid_class_res_nn_1_1 <dbl>, .pred_mid_class_res_rf_1_10 <dbl>, -#> # .pred_mid_class_res_rf_1_05 <dbl>, .pred_mid_class_res_rf_1_01 <dbl>, -#> # .pred_full_class_res_rf_1_04 <dbl>, .pred_full_class_res_rf_1_02 <dbl>, -#> # .pred_full_class_res_rf_1_08 <dbl>, .pred_full_class_res_rf_1_07 <dbl>, -#> # .pred_full_class_res_nn_1_1 <dbl>, .pred_full_class_res_rf_1_10 <dbl>, -#> # .pred_full_class_res_rf_1_05 <dbl>, .pred_full_class_res_rf_1_01 <dbl>
# } - -
+
# \donttest{
+# see the "Example Data" section above for
+# clarification on the data and tuning results
+# objects used in these examples!
+
+data(tree_frogs_reg_test)
+data(tree_frogs_class_test)
+
+# build and fit a regression model stack
+reg_st <-
+  stacks() %>%
+  add_candidates(reg_res_lr) %>%
+  add_candidates(reg_res_sp) %>%
+  blend_predictions() %>%
+  fit_members()
+
+reg_st
+#> ── A stacked ensemble model ─────────────────────────────────────
+#> 
+#> Out of 10 possible candidate members, the ensemble retained 3.
+#> Penalty: 0.1.
+#> Mixture: 1.
+#> 
+#> The 3 highest weighted members are:
+#> # A tibble: 3 x 3
+#>   member         type       weight
+#>   <chr>          <chr>       <dbl>
+#> 1 reg_res_lr_1_1 linear_reg  0.349
+#> 2 reg_res_sp_2_1 linear_reg  0.303
+#> 3 reg_res_sp_8_1 linear_reg  0.267
+
+# predict on the tree frogs testing data
+predict(reg_st, tree_frogs_reg_test)
+#> # A tibble: 143 x 1
+#>    .pred
+#>    <dbl>
+#>  1  40.4
+#>  2 111. 
+#>  3  90.6
+#>  4  33.8
+#>  5  75.3
+#>  6  90.0
+#>  7 122. 
+#>  8  82.4
+#>  9  37.6
+#> 10  77.3
+#> # … with 133 more rows
+
+# include the predictions from the members
+predict(reg_st, tree_frogs_reg_test, members = TRUE)
+#> # A tibble: 143 x 4
+#>    .pred reg_res_lr_1_1 reg_res_sp_8_1 reg_res_sp_2_1
+#>    <dbl>          <dbl>          <dbl>          <dbl>
+#>  1  40.4           38.0           34.3           36.9
+#>  2 111.           124.           117.            98.6
+#>  3  90.6           84.5           92.4           98.2
+#>  4  33.8           35.3           28.0           23.8
+#>  5  75.3           79.0           77.0           67.6
+#>  6  90.0           83.7           92.2           97.3
+#>  7 122.           118.           139.           123. 
+#>  8  82.4           80.0           79.2           88.0
+#>  9  37.6           36.5           30.7           32.7
+#> 10  77.3           79.4           78.0           72.8
+#> # … with 133 more rows
+
+# build and fit a classification model stack
+class_st <-
+  stacks() %>%
+  add_candidates(class_res_nn) %>%
+  add_candidates(class_res_rf) %>%
+  blend_predictions() %>%
+  fit_members()
+#> ! Bootstrap05: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -61); ...
+#> ! Bootstrap09: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -73); ...
+#> ! Bootstrap11: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -65); ...
+#> ! Bootstrap18: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -79); ...
+#> ! Bootstrap19: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -77); ...
+#> ! Bootstrap23: preprocessor 1/1, model 1/1: from glmnet Fortran code (error code -82); ...
+#> Error in {    asNamespace("stacks")$fit_member(name = mem, wflows = model_stack[["model_defs"]],         members_map = members_map, train_dat = dat)} task 2 failed - "This engine requires some package installs: 'ranger'"
+ 
+class_st
+#> Error in eval(expr, envir, enclos) object 'class_st' not found
+
+# predict reflex, first as a class, then as
+# class probabilities
+predict(class_st, tree_frogs_class_test)
+#> Error in predict(class_st, tree_frogs_class_test) object 'class_st' not found
+predict(class_st, tree_frogs_class_test, type = "prob")
+#> Error in predict(class_st, tree_frogs_class_test, type = "prob") object 'class_st' not found
+
+# returning the member predictions as well
+predict(
+  class_st, 
+  tree_frogs_class_test, 
+  type = "prob", 
+  members = TRUE
+)
+#> Error in predict(class_st, tree_frogs_class_test, type = "prob", members = TRUE) object 'class_st' not found
+# }
+
+

Arguments

@@ -215,6 +215,8 @@

Contents

+ + diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html index 4182110b..113363b6 100644 --- a/docs/reference/reexports.html +++ b/docs/reference/reexports.html @@ -85,9 +85,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +

Arguments

@@ -221,6 +221,8 @@

Contents

+ + diff --git a/docs/reference/stacks.html b/docs/reference/stacks.html index 3ae19ae3..6b8ed5e8 100644 --- a/docs/reference/stacks.html +++ b/docs/reference/stacks.html @@ -86,9 +86,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +
-
stacks(...)
+
stacks(...)

Arguments

@@ -224,6 +224,8 @@

Contents

+ + diff --git a/docs/reference/stacks_description.html b/docs/reference/stacks_description.html index 5258b5d0..31b73112 100644 --- a/docs/reference/stacks_description.html +++ b/docs/reference/stacks_description.html @@ -81,9 +81,15 @@ gtag('config', 'UA-115082821-1'); + + + + + +