From c84b330a30c0b6b8b71ec5d2bffab05928a715d0 Mon Sep 17 00:00:00 2001 From: Mike Mahoney Date: Fri, 29 Oct 2021 11:09:51 -0400 Subject: [PATCH 1/4] Add 'nrounds' as an alias for 'num_iterations' --- R-package/R/aliases.R | 1 + R-package/tests/testthat/test_basic.R | 54 +++++++++++++++++++++++++++ docs/Parameters.rst | 2 +- include/LightGBM/config.h | 2 +- python-package/lightgbm/basic.py | 1 + src/io/config_auto.cpp | 1 + 6 files changed, 59 insertions(+), 2 deletions(-) diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R index 5fcba4f46fe7..7cd3245727af 100644 --- a/R-package/R/aliases.R +++ b/R-package/R/aliases.R @@ -113,6 +113,7 @@ , "num_trees" , "num_round" , "num_rounds" + , "nrounds" , "num_boost_round" , "n_estimators" , "max_iter" diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 5aff8e2c2d2c..cc717063d253 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -188,6 +188,60 @@ test_that("lightgbm() rejects negative or 0 value passed to nrounds", { } }) +test_that("lightgbm() accepts nrounds as either a top-level argument or parameter", { + nrounds <- 15L + + set.seed(708L) + top_level_bst <- lightgbm( + data = train$data + , label = train$label + , nrounds = nrounds + , params = list( + objective = "regression" + , metric = "l2" + , num_leaves = 5L + ) + , save_name = tempfile(fileext = ".model") + ) + + set.seed(708L) + param_bst <- lightgbm( + data = train$data + , label = train$label + , params = list( + objective = "regression" + , metric = "l2" + , num_leaves = 5L + , nrounds = nrounds + ) + , save_name = tempfile(fileext = ".model") + ) + + set.seed(708L) + both_customized <- lightgbm( + data = train$data + , label = train$label + , nrounds = 20L + , params = list( + objective = "regression" + , metric = "l2" + , num_leaves = 5L + , nrounds = nrounds + ) + , save_name = tempfile(fileext = ".model") + ) + + expect_equal(param_bst$current_iter(), top_level_bst$current_iter()) + expect_equal(param_bst$best_score + , top_level_bst$best_score + , tolerance = TOLERANCE) + + expect_equal(param_bst$current_iter(), both_customized$current_iter()) + expect_equal(param_bst$best_score + , both_customized$best_score + , tolerance = TOLERANCE) +}) + test_that("lightgbm() performs evaluation on validation sets if they are provided", { set.seed(708L) dvalid1 <- lgb.Dataset( diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 5faa9af9fd31..8a37bbf90dc7 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -153,7 +153,7 @@ Core Parameters - **Note**: can be used only in CLI version -- ``num_iterations`` :raw-html:`🔗︎`, default = ``100``, type = int, aliases: ``num_iteration``, ``n_iter``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``, ``max_iter``, constraints: ``num_iterations >= 0`` +- ``num_iterations`` :raw-html:`🔗︎`, default = ``100``, type = int, aliases: ``num_iteration``, ``n_iter``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``nrounds``, ``num_boost_round``, ``n_estimators``, ``max_iter``, constraints: ``num_iterations >= 0`` - number of boosting iterations diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 45fffa432819..e167f73284a6 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -161,7 +161,7 @@ struct Config { // desc = **Note**: can be used only in CLI version std::vector valid; - // alias = num_iteration, n_iter, num_tree, num_trees, num_round, num_rounds, num_boost_round, n_estimators, max_iter + // alias = num_iteration, n_iter, num_tree, num_trees, num_round, num_rounds, nrounds, num_boost_round, n_estimators, max_iter // check = >=0 // desc = number of boosting iterations // desc = **Note**: internally, LightGBM constructs ``num_class * num_iterations`` trees for multi-class classification problems diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 83a4b5c071da..eb7d4a16b813 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -386,6 +386,7 @@ class _ConfigAliases: "num_trees", "num_round", "num_rounds", + "nrounds", "num_boost_round", "n_estimators", "max_iter"}, diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index 4e3f000a88f5..cd24790b820c 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -33,6 +33,7 @@ const std::unordered_map& Config::alias_table() { {"num_trees", "num_iterations"}, {"num_round", "num_iterations"}, {"num_rounds", "num_iterations"}, + {"nrounds", "num_iterations"}, {"num_boost_round", "num_iterations"}, {"n_estimators", "num_iterations"}, {"max_iter", "num_iterations"}, From d47296c4a7c80dc2723a9816b4787ab9af130b96 Mon Sep 17 00:00:00 2001 From: Mike Mahoney Date: Fri, 29 Oct 2021 17:06:02 -0400 Subject: [PATCH 2/4] Improve tests --- R-package/tests/testthat/test_basic.R | 94 ++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 9 deletions(-) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index cc717063d253..f2ae5c549462 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -230,16 +230,23 @@ test_that("lightgbm() accepts nrounds as either a top-level argument or paramete ) , save_name = tempfile(fileext = ".model") ) + + top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]] + param_l2 <- param_bst$eval_train()[[1L]][["value"]] + both_l2 <- both_customized$eval_train()[[1L]][["value"]] + + # check type just to be sure the subsetting didn't return a NULL + expect_true(is.numeric(top_level_l2)) + expect_true(is.numeric(param_l2)) + expect_true(is.numeric(both_l2)) + + # check that model produces identical performance + expect_identical(top_level_l2, param_l2) + expect_identical(both_l2, param_l2) + + expect_identical(param_bst$current_iter(), top_level_bst$current_iter()) + expect_identical(param_bst$current_iter(), both_customized$current_iter()) - expect_equal(param_bst$current_iter(), top_level_bst$current_iter()) - expect_equal(param_bst$best_score - , top_level_bst$best_score - , tolerance = TOLERANCE) - - expect_equal(param_bst$current_iter(), both_customized$current_iter()) - expect_equal(param_bst$best_score - , both_customized$best_score - , tolerance = TOLERANCE) }) test_that("lightgbm() performs evaluation on validation sets if they are provided", { @@ -521,6 +528,75 @@ test_that("lgb.train() rejects negative or 0 value passed to nrounds", { } }) + +test_that("lgb.train() accepts nrounds as either a top-level argument or parameter", { + nrounds <- 15L + + set.seed(708L) + top_level_bst <- lgb.train( + data = lgb.Dataset( + train$data + , label = train$label + ) + , nrounds = nrounds + , params = list( + objective = "regression" + , metric = "l2" + , num_leaves = 5L + , save_name = tempfile(fileext = ".model") + ) + ) + + set.seed(708L) + param_bst <- lgb.train( + data = lgb.Dataset( + train$data + , label = train$label + ) + , params = list( + objective = "regression" + , metric = "l2" + , num_leaves = 5L + , nrounds = nrounds + , save_name = tempfile(fileext = ".model") + ) + ) + + set.seed(708L) + both_customized <- lgb.train( + data = lgb.Dataset( + train$data + , label = train$label + ) + , nrounds = 20L + , params = list( + objective = "regression" + , metric = "l2" + , num_leaves = 5L + , nrounds = nrounds + , save_name = tempfile(fileext = ".model") + ) + ) + + top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]] + params_l2 <- param_bst$eval_train()[[1L]][["value"]] + both_l2 <- both_customized$eval_train()[[1L]][["value"]] + + # check type just to be sure the subsetting didn't return a NULL + expect_true(is.numeric(top_level_l2)) + expect_true(is.numeric(params_l2)) + expect_true(is.numeric(both_l2)) + + # check that model produces identical performance + expect_identical(top_level_l2, params_l2) + expect_identical(both_l2, params_l2) + + expect_identical(param_bst$current_iter(), top_level_bst$current_iter()) + expect_identical(param_bst$current_iter(), both_customized$current_iter()) + +}) + + test_that("lgb.train() throws an informative error if 'data' is not an lgb.Dataset", { bad_values <- list( 4L From 302871686da4d25f91144db75f397c0d482003f2 Mon Sep 17 00:00:00 2001 From: Mike Mahoney Date: Fri, 29 Oct 2021 17:11:51 -0400 Subject: [PATCH 3/4] Compare against nrounds directly --- R-package/tests/testthat/test_basic.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index f2ae5c549462..62acb8eadaf6 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -232,20 +232,21 @@ test_that("lightgbm() accepts nrounds as either a top-level argument or paramete ) top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]] - param_l2 <- param_bst$eval_train()[[1L]][["value"]] + params_l2 <- param_bst$eval_train()[[1L]][["value"]] both_l2 <- both_customized$eval_train()[[1L]][["value"]] # check type just to be sure the subsetting didn't return a NULL expect_true(is.numeric(top_level_l2)) - expect_true(is.numeric(param_l2)) + expect_true(is.numeric(params_l2)) expect_true(is.numeric(both_l2)) # check that model produces identical performance - expect_identical(top_level_l2, param_l2) - expect_identical(both_l2, param_l2) + expect_identical(top_level_l2, params_l2) + expect_identical(both_l2, params_l2) expect_identical(param_bst$current_iter(), top_level_bst$current_iter()) expect_identical(param_bst$current_iter(), both_customized$current_iter()) + expect_identical(param_bst$current_iter(), nrounds) }) @@ -593,6 +594,7 @@ test_that("lgb.train() accepts nrounds as either a top-level argument or paramet expect_identical(param_bst$current_iter(), top_level_bst$current_iter()) expect_identical(param_bst$current_iter(), both_customized$current_iter()) + expect_identical(param_bst$current_iter(), nrounds) }) From 00704025b47988372cee11b10bef2fa0c1be650d Mon Sep 17 00:00:00 2001 From: Mike Mahoney Date: Wed, 10 Nov 2021 08:26:29 -0500 Subject: [PATCH 4/4] Fix whitespace lints --- R-package/tests/testthat/test_basic.R | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 62acb8eadaf6..92b11650c3e2 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -230,7 +230,7 @@ test_that("lightgbm() accepts nrounds as either a top-level argument or paramete ) , save_name = tempfile(fileext = ".model") ) - + top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]] params_l2 <- param_bst$eval_train()[[1L]][["value"]] both_l2 <- both_customized$eval_train()[[1L]][["value"]] @@ -239,7 +239,7 @@ test_that("lightgbm() accepts nrounds as either a top-level argument or paramete expect_true(is.numeric(top_level_l2)) expect_true(is.numeric(params_l2)) expect_true(is.numeric(both_l2)) - + # check that model produces identical performance expect_identical(top_level_l2, params_l2) expect_identical(both_l2, params_l2) @@ -532,7 +532,7 @@ test_that("lgb.train() rejects negative or 0 value passed to nrounds", { test_that("lgb.train() accepts nrounds as either a top-level argument or parameter", { nrounds <- 15L - + set.seed(708L) top_level_bst <- lgb.train( data = lgb.Dataset( @@ -547,7 +547,7 @@ test_that("lgb.train() accepts nrounds as either a top-level argument or paramet , save_name = tempfile(fileext = ".model") ) ) - + set.seed(708L) param_bst <- lgb.train( data = lgb.Dataset( @@ -562,7 +562,7 @@ test_that("lgb.train() accepts nrounds as either a top-level argument or paramet , save_name = tempfile(fileext = ".model") ) ) - + set.seed(708L) both_customized <- lgb.train( data = lgb.Dataset( @@ -578,24 +578,24 @@ test_that("lgb.train() accepts nrounds as either a top-level argument or paramet , save_name = tempfile(fileext = ".model") ) ) - + top_level_l2 <- top_level_bst$eval_train()[[1L]][["value"]] params_l2 <- param_bst$eval_train()[[1L]][["value"]] both_l2 <- both_customized$eval_train()[[1L]][["value"]] - + # check type just to be sure the subsetting didn't return a NULL expect_true(is.numeric(top_level_l2)) expect_true(is.numeric(params_l2)) expect_true(is.numeric(both_l2)) - + # check that model produces identical performance expect_identical(top_level_l2, params_l2) expect_identical(both_l2, params_l2) - + expect_identical(param_bst$current_iter(), top_level_bst$current_iter()) expect_identical(param_bst$current_iter(), both_customized$current_iter()) expect_identical(param_bst$current_iter(), nrounds) - + })