From 135d38cb019c5b034c2a8eccfb611db250145447 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Fri, 18 Jun 2021 00:43:29 +0300 Subject: [PATCH] fix param aliases --- R-package/R/aliases.R | 7 ++++- docs/Parameters.rst | 44 ++++++++++++++++---------------- include/LightGBM/config.h | 26 +++++++++---------- python-package/lightgbm/basic.py | 9 ++++++- src/c_api.cpp | 6 ++++- src/io/config_auto.cpp | 10 ++++---- 6 files changed, 59 insertions(+), 43 deletions(-) diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R index cb2288fedb15..fd69ae51febf 100644 --- a/R-package/R/aliases.R +++ b/R-package/R/aliases.R @@ -4,7 +4,7 @@ # [description] List of respected parameter aliases specific to lgb.Dataset. Wrapped in a function to # take advantage of lazy evaluation (so it doesn't matter what order # R sources files during installation). -# [return] A named list, where each key is a parameter relevant to lgb.DataSet and each value is a character +# [return] A named list, where each key is a parameter relevant to lgb.Dataset and each value is a character # vector of corresponding aliases. .DATASET_PARAMETERS <- function() { return( @@ -57,6 +57,10 @@ "label_column" , "label" ) + , "linear_tree" = c( + "linear_tree" + , "linear_trees" + ) , "max_bin" = "max_bin" , "max_bin_by_feature" = "max_bin_by_feature" , "min_data_in_bin" = "min_data_in_bin" @@ -64,6 +68,7 @@ "pre_partition" , "is_pre_partition" ) + , "precise_float_parser" = "precise_float_parser" , "two_round" = c( "two_round" , "two_round_loading" diff --git a/docs/Parameters.rst b/docs/Parameters.rst index a64693141889..c54bf96a6f8e 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -139,28 +139,6 @@ Core Parameters - **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations -- ``linear_tree`` :raw-html:`🔗︎`, default = ``false``, type = bool, aliases: ``linear_trees`` - - - fit piecewise linear gradient boosting tree - - - tree splits are chosen in the usual way, but the model at each leaf is linear instead of constant - - - the linear model at each leaf includes all the numerical features in that leaf's branch - - - categorical features are used for splits as normal but are not used in the linear models - - - missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R - - - it is recommended to rescale data before training so that features have similar mean and standard deviation - - - **Note**: only works with CPU and ``serial`` tree learner - - - **Note**: ``regression_l1`` objective is not supported with linear tree boosting - - - **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM - - - **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves - - ``data`` :raw-html:`🔗︎`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename`` - path of training data, LightGBM will train from this data @@ -672,6 +650,28 @@ IO Parameters Dataset Parameters ~~~~~~~~~~~~~~~~~~ +- ``linear_tree`` :raw-html:`🔗︎`, default = ``false``, type = bool, aliases: ``linear_trees`` + + - fit piecewise linear gradient boosting tree + + - tree splits are chosen in the usual way, but the model at each leaf is linear instead of constant + + - the linear model at each leaf includes all the numerical features in that leaf's branch + + - categorical features are used for splits as normal but are not used in the linear models + + - missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R + + - it is recommended to rescale data before training so that features have similar mean and standard deviation + + - **Note**: only works with CPU and ``serial`` tree learner + + - **Note**: ``regression_l1`` objective is not supported with linear tree boosting + + - **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM + + - **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves + - ``max_bin`` :raw-html:`🔗︎`, default = ``255``, type = int, constraints: ``max_bin > 1`` - max number of bins that feature values will be bucketed in diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 33649e1fc2cd..32352691a268 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -149,19 +149,6 @@ struct Config { // descl2 = **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations std::string boosting = "gbdt"; - // alias = linear_trees - // desc = fit piecewise linear gradient boosting tree - // descl2 = tree splits are chosen in the usual way, but the model at each leaf is linear instead of constant - // descl2 = the linear model at each leaf includes all the numerical features in that leaf's branch - // descl2 = categorical features are used for splits as normal but are not used in the linear models - // descl2 = missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R - // descl2 = it is recommended to rescale data before training so that features have similar mean and standard deviation - // descl2 = **Note**: only works with CPU and ``serial`` tree learner - // descl2 = **Note**: ``regression_l1`` objective is not supported with linear tree boosting - // descl2 = **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM - // descl2 = **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves - bool linear_tree = false; - // alias = train, train_data, train_data_file, data_filename // desc = path of training data, LightGBM will train from this data // desc = **Note**: can be used only in CLI version @@ -586,6 +573,19 @@ struct Config { #pragma region Dataset Parameters + // alias = linear_trees + // desc = fit piecewise linear gradient boosting tree + // descl2 = tree splits are chosen in the usual way, but the model at each leaf is linear instead of constant + // descl2 = the linear model at each leaf includes all the numerical features in that leaf's branch + // descl2 = categorical features are used for splits as normal but are not used in the linear models + // descl2 = missing values should not be encoded as ``0``. Use ``np.nan`` for Python, ``NA`` for the CLI, and ``NA``, ``NA_real_``, or ``NA_integer_`` for R + // descl2 = it is recommended to rescale data before training so that features have similar mean and standard deviation + // descl2 = **Note**: only works with CPU and ``serial`` tree learner + // descl2 = **Note**: ``regression_l1`` objective is not supported with linear tree boosting + // descl2 = **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM + // descl2 = **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves + bool linear_tree = false; + // check = >1 // desc = max number of bins that feature values will be bucketed in // desc = small number of bins may reduce training accuracy but may increase general power (deal with over-fitting) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 121b371459d1..2c06661c841f 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -311,6 +311,8 @@ class _ConfigAliases: "sparse"}, "label_column": {"label_column", "label"}, + "linear_tree": {"linear_tree", + "linear_trees"}, "local_listen_port": {"local_listen_port", "local_port", "port"}, @@ -1144,6 +1146,7 @@ def get_params(self): "max_bin_by_feature", "min_data_in_bin", "pre_partition", + "precise_float_parser", "two_round", "use_missing", "weight_column", @@ -3180,7 +3183,11 @@ def refit(self, data, label, decay_rate=0.9, **kwargs): _safe_call(_LIB.LGBM_BoosterGetLinear( self.handle, ctypes.byref(out_is_linear))) - new_params = deepcopy(self.params) + new_params = _choose_param_value( + main_param_name="linear_tree", + params=self.params, + default_value=None + ) new_params["linear_tree"] = out_is_linear.value train_set = Dataset(data, label, silent=True, params=new_params) new_params['refit_decay_rate'] = decay_rate diff --git a/src/c_api.cpp b/src/c_api.cpp index 3d20d92da70d..67ddda43cd06 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -287,9 +287,13 @@ class Booster { "You need to set `feature_pre_filter=false` to dynamically change " "the `min_data_in_leaf`."); } - if (new_param.count("linear_tree") && (new_config.linear_tree != old_config.linear_tree)) { + if (new_param.count("linear_tree") && new_config.linear_tree != old_config.linear_tree) { Log::Fatal("Cannot change linear_tree after constructed Dataset handle."); } + if (new_param.count("precise_float_parser") && + new_config.precise_float_parser != old_config.precise_float_parser) { + Log::Fatal("Cannot change precise_float_parser after constructed Dataset handle."); + } } void ResetConfig(const char* parameters) { diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index 92257f9636aa..d3a9432f482c 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -16,7 +16,6 @@ const std::unordered_map& Config::alias_table() { {"application", "objective"}, {"boosting_type", "boosting"}, {"boost", "boosting"}, - {"linear_trees", "linear_tree"}, {"train", "data"}, {"train_data", "data"}, {"train_data_file", "data"}, @@ -106,6 +105,7 @@ const std::unordered_map& Config::alias_table() { {"model_output", "output_model"}, {"model_out", "output_model"}, {"save_period", "snapshot_freq"}, + {"linear_trees", "linear_tree"}, {"subsample_for_bin", "bin_construct_sample_cnt"}, {"data_seed", "data_random_seed"}, {"is_sparse", "is_enable_sparse"}, @@ -176,7 +176,6 @@ const std::unordered_set& Config::parameter_set() { "task", "objective", "boosting", - "linear_tree", "data", "valid", "num_iterations", @@ -241,6 +240,7 @@ const std::unordered_set& Config::parameter_set() { "output_model", "saved_feature_importance_type", "snapshot_freq", + "linear_tree", "max_bin", "max_bin_by_feature", "min_data_in_bin", @@ -309,8 +309,6 @@ const std::unordered_set& Config::parameter_set() { void Config::GetMembersFromString(const std::unordered_map& params) { std::string tmp_str = ""; - GetBool(params, "linear_tree", &linear_tree); - GetString(params, "data", &data); if (GetString(params, "valid", &tmp_str)) { @@ -483,6 +481,8 @@ void Config::GetMembersFromString(const std::unordered_map