[R-package] remove support for '...' in lgb.train() (#4863)

* [R-package] remove support for '...' in lgb.train() * Apply suggestions from code review Co-authored-by: Nikita Titov <nekit94-08@mail.ru> Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
microsoft · Dec 7, 2021 · 00f87c5 · 00f87c5
1 parent 431556a
commit 00f87c5
Show file tree

Hide file tree

Showing 12 changed files with 155 additions and 166 deletions.
diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R
@@ -12,19 +12,6 @@
 #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the
 #'                   booster model into a predictor model which frees up memory and the
 #'                   original datasets
-#' @param ... other parameters, see \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
-#'            the "Parameters" section of the documentation} for more information. A few key parameters:
-#'            \itemize{
-#'                \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
-#'                \item{\code{num_leaves}: Maximum number of leaves in one tree.}
-#'                \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
-#'                                 overfitting. Tree still grow by leaf-wise.}
-#'                \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
-#'                             the number of real CPU cores(\code{parallel::detectCores(logical = FALSE)}),
-#'                             not the number of threads (most CPU using hyper-threading to generate 2 threads
-#'                             per CPU core).}
-#'            }
-#'            NOTE: As of v3.3.0, use of \code{...} is deprecated. Add parameters to \code{params} directly.
 #' @inheritSection lgb_shared_params Early Stopping
 #' @return a trained booster model \code{lgb.Booster}.
 #'
@@ -67,8 +54,7 @@ lgb.train <- function(params = list(),
                       early_stopping_rounds = NULL,
                       callbacks = list(),
                       reset_data = FALSE,
-                      serializable = TRUE,
-                      ...) {
+                      serializable = TRUE) {
 
   # validate inputs early to avoid unnecessary computation
   if (nrounds <= 0L) {
@@ -88,23 +74,12 @@ lgb.train <- function(params = list(),
   }
 
   # Setup temporary variables
-  additional_params <- list(...)
-  params <- append(params, additional_params)
   params$verbose <- verbose
   params <- lgb.check.obj(params = params, obj = obj)
   params <- lgb.check.eval(params = params, eval = eval)
   fobj <- NULL
   eval_functions <- list(NULL)
 
-  if (length(additional_params) > 0L) {
-    warning(paste0(
-      "lgb.train: Found the following passed through '...': "
-      , paste(names(additional_params), collapse = ", ")
-      , ". These will be used, but in future releases of lightgbm, this warning will become an error. "
-      , "Add these to 'params' instead. See ?lgb.train for documentation on how to call this function."
-    ))
-  }
-
   # set some parameters, resolving the way they were passed in with other parameters
   # in `params`.
   # this ensures that the model stored with Booster$save() correctly represents

diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R
@@ -108,14 +108,6 @@ NULL
 #'                            say "the first and tenth columns").}
 #'        \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                          into a predictor model which frees up memory and the original datasets}
-#'         \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.}
-#'         \item{\code{num_leaves}: Maximum number of leaves in one tree.}
-#'         \item{\code{max_depth}: Limit the max depth for tree model. This is used to deal with
-#'                          overfit when #data is small. Tree still grow by leaf-wise.}
-#'          \item{\code{num_threads}: Number of threads for LightGBM. For the best speed, set this to
-#'                             the number of real CPU cores(\code{parallel::detectCores(logical = FALSE)}),
-#'                             not the number of threads (most CPU using hyper-threading to generate 2 threads
-#'                             per CPU core).}
 #'     }
 #' @inheritSection lgb_shared_params Early Stopping
 #' @return a trained \code{lgb.Booster}

diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd
diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd
diff --git a/R-package/tests/testthat/test_Predictor.R b/R-package/tests/testthat/test_Predictor.R
@@ -6,7 +6,9 @@ test_that("Predictor$finalize() should not fail", {
     dtrain <- lgb.Dataset(X, label = y)
     bst <- lgb.train(
         data = dtrain
-        , objective = "regression"
+        , params = list(
+            objective = "regression"
+        )
         , verbose = -1L
         , nrounds = 3L
     )
@@ -32,7 +34,9 @@ test_that("predictions do not fail for integer input", {
     dtrain <- lgb.Dataset(X, label = y)
     fit <- lgb.train(
         data = dtrain
-        , objective = "regression"
+        , params = list(
+            objective = "regression"
+        )
         , verbose = -1L
         , nrounds = 3L
     )
@@ -62,10 +66,12 @@ test_that("start_iteration works correctly", {
     bst <- lightgbm(
         data = as.matrix(train$data)
         , label = train$label
-        , num_leaves = 4L
-        , learning_rate = 0.6
+        , params = list(
+            num_leaves = 4L
+            , learning_rate = 0.6
+            , objective = "binary"
+        )
         , nrounds = 50L
-        , objective = "binary"
         , valids = list("test" = dtest)
         , early_stopping_rounds = 2L
     )

diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
@@ -72,10 +72,12 @@ test_that("train and predict binary classification", {
   bst <- lightgbm(
     data = train$data
     , label = train$label
-    , num_leaves = 5L
+    , params = list(
+        num_leaves = 5L
+        , objective = "binary"
+        , metric = "binary_error"
+    )
     , nrounds = nrounds
-    , objective = "binary"
-    , metric = "binary_error"
     , save_name = tempfile(fileext = ".model")
   )
   expect_false(is.null(bst$record_evals))
@@ -100,14 +102,16 @@ test_that("train and predict softmax", {
   bst <- lightgbm(
     data = as.matrix(iris[, -5L])
     , label = lb
-    , num_leaves = 4L
-    , learning_rate = 0.05
+    , params = list(
+        num_leaves = 4L
+        , learning_rate = 0.05
+        , min_data = 20L
+        , min_hessian = 10.0
+        , objective = "multiclass"
+        , metric = "multi_error"
+        , num_class = 3L
+    )
     , nrounds = 20L
-    , min_data = 20L
-    , min_hessian = 10.0
-    , objective = "multiclass"
-    , metric = "multi_error"
-    , num_class = 3L
     , save_name = tempfile(fileext = ".model")
   )
 
@@ -125,11 +129,13 @@ test_that("use of multiple eval metrics works", {
   bst <- lightgbm(
     data = train$data
     , label = train$label
-    , num_leaves = 4L
-    , learning_rate = 1.0
+    , params = list(
+        num_leaves = 4L
+        , learning_rate = 1.0
+        , objective = "binary"
+        , metric = metrics
+    )
     , nrounds = 10L
-    , objective = "binary"
-    , metric = metrics
     , save_name = tempfile(fileext = ".model")
   )
   expect_false(is.null(bst$record_evals))
@@ -147,10 +153,12 @@ test_that("lgb.Booster.upper_bound() and lgb.Booster.lower_bound() work as expec
   bst <- lightgbm(
     data = train$data
     , label = train$label
-    , num_leaves = 5L
+    , params = list(
+        num_leaves = 5L
+        , objective = "binary"
+        , metric = "binary_error"
+    )
     , nrounds = nrounds
-    , objective = "binary"
-    , metric = "binary_error"
     , save_name = tempfile(fileext = ".model")
   )
   expect_true(abs(bst$lower_bound() - -1.590853) < TOLERANCE)
@@ -163,10 +171,12 @@ test_that("lgb.Booster.upper_bound() and lgb.Booster.lower_bound() work as expec
   bst <- lightgbm(
     data = train$data
     , label = train$label
-    , num_leaves = 5L
+    , params = list(
+        num_leaves = 5L
+        , objective = "regression"
+        , metric = "l2"
+    )
     , nrounds = nrounds
-    , objective = "regression"
-    , metric = "l2"
     , save_name = tempfile(fileext = ".model")
   )
   expect_true(abs(bst$lower_bound() - 0.1513859) < TOLERANCE)
@@ -264,13 +274,15 @@ test_that("lightgbm() performs evaluation on validation sets if they are provide
   bst <- lightgbm(
     data = train$data
     , label = train$label
-    , num_leaves = 5L
-    , nrounds = nrounds
-    , objective = "binary"
-    , metric = c(
-      "binary_error"
-      , "auc"
+    , params = list(
+        num_leaves = 5L
+        , objective = "binary"
+        , metric = c(
+            "binary_error"
+            , "auc"
+        )
     )
+    , nrounds = nrounds
     , valids = list(
       "valid1" = dvalid1
       , "valid2" = dvalid2
@@ -521,11 +533,11 @@ test_that("lgb.train() works as expected with multiple eval metrics", {
       train$data
       , label = train$label
     )
-    , learning_rate = 1.0
     , nrounds = 10L
     , params = list(
       objective = "binary"
       , metric = metrics
+      , learning_rate = 1.0
     )
     , valids = list(
       "train" = lgb.Dataset(
@@ -1499,13 +1511,13 @@ test_that("when early stopping is not activated, best_iter and best_score come f
     objective = "regression"
     , metric = "rmse"
     , learning_rate = 1.5
+    , num_leaves = 5L
   )
 
   # example 1: two valids, neither are the training data
   bst <- lgb.train(
     data = dtrain
     , nrounds = nrounds
-    , num_leaves = 5L
     , valids = list(
       "valid1" = dvalid1
       , "valid2" = dvalid2
@@ -1527,7 +1539,6 @@ test_that("when early stopping is not activated, best_iter and best_score come f
   bst <- lgb.train(
     data = dtrain
     , nrounds = nrounds
-    , num_leaves = 5L
     , valids = list(
       "train" = dtrain
       , "valid1" = dvalid1
@@ -1550,7 +1561,6 @@ test_that("when early stopping is not activated, best_iter and best_score come f
   bst <- lgb.train(
     data = dtrain
     , nrounds = nrounds
-    , num_leaves = 5L
     , valids = list(
       "valid1" = dvalid1
       , "train" = dtrain
@@ -1574,7 +1584,6 @@ test_that("when early stopping is not activated, best_iter and best_score come f
   bst <- lgb.train(
     data = dtrain
     , nrounds = nrounds
-    , num_leaves = 5L
     , valids = list(
       "valid1" = dvalid1
       , "valid2" = dvalid2
@@ -1598,7 +1607,6 @@ test_that("when early stopping is not activated, best_iter and best_score come f
   bst <- lgb.train(
     data = dtrain
     , nrounds = nrounds
-    , num_leaves = 5L
     , valids = list(
       "valid1" = dvalid1
       , "something-random-we-would-not-hardcode" = dtrain
@@ -1623,7 +1631,6 @@ test_that("when early stopping is not activated, best_iter and best_score come f
   bst <- lgb.train(
     data = dtrain
     , nrounds = nrounds
-    , num_leaves = 5L
     , valids = list(
       "train" = dtrain
     )
@@ -1655,7 +1662,6 @@ test_that("lightgbm.train() gives the correct best_score and best_iter for a met
   bst <- lgb.train(
     data = dtrain
     , nrounds = nrounds
-    , num_leaves = 5L
     , valids = list(
       "valid1" = dvalid1
       , "something-random-we-would-not-hardcode" = dtrain
@@ -1664,6 +1670,7 @@ test_that("lightgbm.train() gives the correct best_score and best_iter for a met
       objective = "binary"
       , metric = "auc"
       , learning_rate = 1.5
+      , num_leaves = 5L
     )
   )
   # note that "something-random-we-would-not-hardcode" was recognized as the training
@@ -1708,7 +1715,6 @@ test_that("using lightgbm() without early stopping, best_iter and best_score com
   bst <- lightgbm(
     data = dtrain
     , nrounds = nrounds
-    , num_leaves = 5L
     , valids = list(
       "valid1" = dvalid1
       , "something-random-we-would-not-hardcode" = dtrain
@@ -1718,6 +1724,7 @@ test_that("using lightgbm() without early stopping, best_iter and best_score com
       objective = "binary"
       , metric = "auc"
       , learning_rate = 1.5
+      , num_leaves = 5L
     )
     , verbose = -7L
     , save_name = tempfile(fileext = ".model")