diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R index 7cd3245727af..77fe74ef2af0 100644 --- a/R-package/R/aliases.R +++ b/R-package/R/aliases.R @@ -7,85 +7,30 @@ # [return] A named list, where each key is a parameter relevant to lgb.Dataset and each value is a character # vector of corresponding aliases. .DATASET_PARAMETERS <- function() { - return( - list( - "bin_construct_sample_cnt" = c( - "bin_construct_sample_cnt" - , "subsample_for_bin" - ) - , "categorical_feature" = c( - "categorical_feature" - , "cat_feature" - , "categorical_column" - , "cat_column" - , "categorical_features" - ) - , "data_random_seed" = c( - "data_random_seed" - , "data_seed" - ) - , "enable_bundle" = c( - "enable_bundle" - , "is_enable_bundle" - , "bundle" - ) - , "feature_pre_filter" = "feature_pre_filter" - , "forcedbins_filename" = "forcedbins_filename" - , "group_column" = c( - "group_column" - , "group" - , "group_id" - , "query_column" - , "query" - , "query_id" - ) - , "header" = c( - "header" - , "has_header" - ) - , "ignore_column" = c( - "ignore_column" - , "ignore_feature" - , "blacklist" - ) - , "is_enable_sparse" = c( - "is_enable_sparse" - , "is_sparse" - , "enable_sparse" - , "sparse" - ) - , "label_column" = c( - "label_column" - , "label" - ) - , "linear_tree" = c( - "linear_tree" - , "linear_trees" - ) - , "max_bin" = c( - "max_bin" - , "max_bins" - ) - , "max_bin_by_feature" = "max_bin_by_feature" - , "min_data_in_bin" = "min_data_in_bin" - , "pre_partition" = c( - "pre_partition" - , "is_pre_partition" - ) - , "precise_float_parser" = "precise_float_parser" - , "two_round" = c( - "two_round" - , "two_round_loading" - , "use_two_round_loading" - ) - , "use_missing" = "use_missing" - , "weight_column" = c( - "weight_column" - , "weight" - ) - , "zero_as_missing" = "zero_as_missing" - ) - ) + all_aliases <- .PARAMETER_ALIASES() + return(all_aliases[c( + "bin_construct_sample_cnt" + , "categorical_feature" + , "data_random_seed" + , "enable_bundle" + , "feature_pre_filter" + , "forcedbins_filename" + , "group_column" + , "header" + , "ignore_column" + , "is_enable_sparse" + , "label_column" + , "linear_tree" + , "max_bin" + , "max_bin_by_feature" + , "min_data_in_bin" + , "pre_partition" + , "precise_float_parser" + , "two_round" + , "use_missing" + , "weight_column" + , "zero_as_missing" + )]) } # [description] List of respected parameter aliases. Wrapped in a function to take advantage of @@ -93,33 +38,16 @@ # [return] A named list, where each key is a main LightGBM parameter and each value is a character # vector of corresponding aliases. .PARAMETER_ALIASES <- function() { - learning_params <- list( - "boosting" = c( - "boosting" - , "boost" - , "boosting_type" - ) - , "early_stopping_round" = c( - "early_stopping_round" - , "early_stopping_rounds" - , "early_stopping" - , "n_iter_no_change" - ) - , "num_iterations" = c( - "num_iterations" - , "num_iteration" - , "n_iter" - , "num_tree" - , "num_trees" - , "num_round" - , "num_rounds" - , "nrounds" - , "num_boost_round" - , "n_estimators" - , "max_iter" + params_to_aliases <- jsonlite::fromJSON( + .Call( + LGBM_DumpParamAliases_R ) ) - return(c(learning_params, .DATASET_PARAMETERS())) + for (main_name in names(params_to_aliases)) { + aliases_with_main_name <- c(main_name, unlist(params_to_aliases[[main_name]])) + params_to_aliases[[main_name]] <- aliases_with_main_name + } + return(params_to_aliases) } # [description] diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp index 2150e652841f..3a3582b12cf1 100644 --- a/R-package/src/lightgbm_R.cpp +++ b/R-package/src/lightgbm_R.cpp @@ -872,6 +872,26 @@ SEXP LGBM_BoosterDumpModel_R(SEXP handle, R_API_END(); } +SEXP LGBM_DumpParamAliases_R() { + SEXP cont_token = PROTECT(R_MakeUnwindCont()); + R_API_BEGIN(); + SEXP aliases_str; + int64_t out_len = 0; + int64_t buf_len = 1024 * 1024; + std::vector inner_char_buf(buf_len); + CHECK_CALL(LGBM_DumpParamAliases(buf_len, &out_len, inner_char_buf.data())); + // if aliases string was larger than the initial buffer, allocate a bigger buffer and try again + if (out_len > buf_len) { + inner_char_buf.resize(out_len); + CHECK_CALL(LGBM_DumpParamAliases(out_len, &out_len, inner_char_buf.data())); + } + aliases_str = PROTECT(safe_R_string(static_cast(1), &cont_token)); + SET_STRING_ELT(aliases_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token)); + UNPROTECT(2); + return aliases_str; + R_API_END(); +} + // .Call() calls static const R_CallMethodDef CallEntries[] = { {"LGBM_HandleIsNull_R" , (DL_FUNC) &LGBM_HandleIsNull_R , 1}, @@ -916,6 +936,7 @@ static const R_CallMethodDef CallEntries[] = { {"LGBM_BoosterSaveModel_R" , (DL_FUNC) &LGBM_BoosterSaveModel_R , 4}, {"LGBM_BoosterSaveModelToString_R" , (DL_FUNC) &LGBM_BoosterSaveModelToString_R , 3}, {"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3}, + {"LGBM_DumpParamAliases_R" , (DL_FUNC) &LGBM_DumpParamAliases_R , 0}, {NULL, NULL, 0} }; diff --git a/R-package/src/lightgbm_R.h b/R-package/src/lightgbm_R.h index 41e2fbab13db..40032d7b44cb 100644 --- a/R-package/src/lightgbm_R.h +++ b/R-package/src/lightgbm_R.h @@ -596,4 +596,10 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R( SEXP feature_importance_type ); +/*! +* \brief Dump parameter aliases to JSON +* \return R character vector (length=1) with aliases JSON +*/ +LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamAliases_R(); + #endif // LIGHTGBM_R_H_ diff --git a/R-package/tests/testthat/test_parameters.R b/R-package/tests/testthat/test_parameters.R index 16d1e4a5a5e2..0e790e29bace 100644 --- a/R-package/tests/testthat/test_parameters.R +++ b/R-package/tests/testthat/test_parameters.R @@ -50,6 +50,7 @@ context("parameter aliases") test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", { param_aliases <- .PARAMETER_ALIASES() expect_identical(class(param_aliases), "list") + expect_true(length(param_aliases) > 100L) expect_true(is.character(names(param_aliases))) expect_true(is.character(param_aliases[["boosting"]])) expect_true(is.character(param_aliases[["early_stopping_round"]])) @@ -58,6 +59,7 @@ test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where expect_true(length(names(param_aliases)) == length(param_aliases)) expect_true(all(sapply(param_aliases, is.character))) expect_true(length(unique(names(param_aliases))) == length(param_aliases)) + expect_equal(sort(param_aliases[["task"]]), c("task", "task_type")) }) test_that("training should warn if you use 'dart' boosting, specified with 'boosting' or aliases", { diff --git a/helpers/parameter_generator.py b/helpers/parameter_generator.py index ccb6675f37af..bcc6d34bfa9a 100644 --- a/helpers/parameter_generator.py +++ b/helpers/parameter_generator.py @@ -6,6 +6,7 @@ along with parameters description in LightGBM/docs/Parameters.rst file from the information in LightGBM/include/LightGBM/config.h file. """ +from collections import defaultdict from pathlib import Path from typing import Any, Dict, List, Optional, Tuple @@ -291,6 +292,7 @@ def gen_parameter_code( keys, infos = get_parameter_infos(config_hpp) names = get_names(infos) alias = get_alias(infos) + names_with_aliases = defaultdict(list) str_to_write = r"""/*! * Copyright (c) 2018 Microsoft Corporation. All rights reserved. * Licensed under the MIT License. See LICENSE file in the project root for license information. @@ -306,6 +308,7 @@ def gen_parameter_code( for pair in alias: str_to_write += f' {{"{pair[0]}", "{pair[1]}"}},\n' + names_with_aliases[pair[1]].append(pair[0]) str_to_write += " });\n" str_to_write += " return aliases;\n" str_to_write += "}\n\n" @@ -353,6 +356,21 @@ def gen_parameter_code( # tails str_to_write += " return str_buf.str();\n" str_to_write += "}\n\n" + + str_to_write += "const std::string Config::DumpAliases() {\n" + str_to_write += " std::stringstream str_buf;\n" + str_to_write += ' str_buf << "{";\n' + for idx, name in enumerate(names): + if idx > 0: + str_to_write += ', ";\n' + aliases = '\\", \\"'.join([alias for alias in names_with_aliases[name]]) + aliases = f'[\\"{aliases}\\"]' if aliases else '[]' + str_to_write += f' str_buf << "\\"{name}\\": {aliases}' + str_to_write += '";\n' + str_to_write += ' str_buf << "}";\n' + str_to_write += " return str_buf.str();\n" + str_to_write += "}\n\n" + str_to_write += "} // namespace LightGBM\n" with open(config_out_cpp, "w") as config_out_cpp_file: config_out_cpp_file.write(str_to_write) diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h index debe163359ae..e0df38eeb82e 100644 --- a/include/LightGBM/c_api.h +++ b/include/LightGBM/c_api.h @@ -52,6 +52,17 @@ typedef void* FastConfigHandle; /*!< \brief Handle of FastConfig. */ */ LIGHTGBM_C_EXPORT const char* LGBM_GetLastError(); +/*! + * \brief Dump all parameter names with their aliases to JSON. + * \param buffer_len String buffer length, if ``buffer_len < out_len``, you should re-allocate buffer + * \param[out] out_len Actual output length + * \param[out] out_str JSON format string of parameters, should pre-allocate memory + * \return 0 when succeed, -1 when failure happens + */ +LIGHTGBM_C_EXPORT int LGBM_DumpParamAliases(int64_t buffer_len, + int64_t* out_len, + char* out_str); + /*! * \brief Register a callback function for log redirecting. * \param callback The callback function to register diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index a4352f78b096..c045970a8f1f 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -1041,6 +1041,7 @@ struct Config { static const std::unordered_set& parameter_set(); std::vector> auc_mu_weights_matrix; std::vector> interaction_constraints_vector; + static const std::string DumpAliases(); private: void CheckParamConflict(); diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 8227bcf61ed1..1b0824bc692c 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -325,106 +325,47 @@ class LGBMDeprecationWarning(UserWarning): class _ConfigAliases: - aliases = {"bin_construct_sample_cnt": {"bin_construct_sample_cnt", - "subsample_for_bin"}, - "boosting": {"boosting", - "boosting_type", - "boost"}, - "categorical_feature": {"categorical_feature", - "cat_feature", - "categorical_column", - "cat_column", - "categorical_features"}, - "data_random_seed": {"data_random_seed", - "data_seed"}, - "early_stopping_round": {"early_stopping_round", - "early_stopping_rounds", - "early_stopping", - "n_iter_no_change"}, - "enable_bundle": {"enable_bundle", - "is_enable_bundle", - "bundle"}, - "eval_at": {"eval_at", - "ndcg_eval_at", - "ndcg_at", - "map_eval_at", - "map_at"}, - "group_column": {"group_column", - "group", - "group_id", - "query_column", - "query", - "query_id"}, - "header": {"header", - "has_header"}, - "ignore_column": {"ignore_column", - "ignore_feature", - "blacklist"}, - "is_enable_sparse": {"is_enable_sparse", - "is_sparse", - "enable_sparse", - "sparse"}, - "label_column": {"label_column", - "label"}, - "linear_tree": {"linear_tree", - "linear_trees"}, - "local_listen_port": {"local_listen_port", - "local_port", - "port"}, - "machines": {"machines", - "workers", - "nodes"}, - "max_bin": {"max_bin", - "max_bins"}, - "metric": {"metric", - "metrics", - "metric_types"}, - "num_class": {"num_class", - "num_classes"}, - "num_iterations": {"num_iterations", - "num_iteration", - "n_iter", - "num_tree", - "num_trees", - "num_round", - "num_rounds", - "nrounds", - "num_boost_round", - "n_estimators", - "max_iter"}, - "num_machines": {"num_machines", - "num_machine"}, - "num_threads": {"num_threads", - "num_thread", - "nthread", - "nthreads", - "n_jobs"}, - "objective": {"objective", - "objective_type", - "app", - "application", - "loss"}, - "pre_partition": {"pre_partition", - "is_pre_partition"}, - "tree_learner": {"tree_learner", - "tree", - "tree_type", - "tree_learner_type"}, - "two_round": {"two_round", - "two_round_loading", - "use_two_round_loading"}, - "weight_column": {"weight_column", - "weight"}} + # lazy evaluation to allow import without dynamic library, e.g., for docs generation + aliases = None + + @staticmethod + def _get_all_param_aliases() -> Dict[str, Set[str]]: + buffer_len = 1 << 20 + tmp_out_len = ctypes.c_int64(0) + string_buffer = ctypes.create_string_buffer(buffer_len) + ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) + _safe_call(_LIB.LGBM_DumpParamAliases( + ctypes.c_int64(buffer_len), + ctypes.byref(tmp_out_len), + ptr_string_buffer)) + actual_len = tmp_out_len.value + # if buffer length is not long enough, re-allocate a buffer + if actual_len > buffer_len: + string_buffer = ctypes.create_string_buffer(actual_len) + ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) + _safe_call(_LIB.LGBM_DumpParamAliases( + ctypes.c_int64(actual_len), + ctypes.byref(tmp_out_len), + ptr_string_buffer)) + aliases = json.loads( + string_buffer.value.decode('utf-8'), + object_hook=lambda obj: {k: set(v) | {k} for k, v in obj.items()} + ) + return aliases @classmethod - def get(cls, *args): + def get(cls, *args) -> Set[str]: + if cls.aliases is None: + cls.aliases = cls._get_all_param_aliases() ret = set() for i in args: ret |= cls.aliases.get(i, {i}) return ret @classmethod - def get_by_alias(cls, *args): + def get_by_alias(cls, *args) -> Set[str]: + if cls.aliases is None: + cls.aliases = cls._get_all_param_aliases() ret = set(args) for arg in args: for aliases in cls.aliases.values(): diff --git a/src/c_api.cpp b/src/c_api.cpp index 9a471815b799..d8a8deaf57b0 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -888,6 +888,18 @@ const char* LGBM_GetLastError() { return LastErrorMsg(); } +int LGBM_DumpParamAliases(int64_t buffer_len, + int64_t* out_len, + char* out_str) { + API_BEGIN(); + std::string aliases = Config::DumpAliases(); + *out_len = static_cast(aliases.size()) + 1; + if (*out_len <= buffer_len) { + std::memcpy(out_str, aliases.c_str(), *out_len); + } + API_END(); +} + int LGBM_RegisterLogCallback(void (*callback)(const char*)) { API_BEGIN(); Log::ResetCallBack(callback); diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index 682264358893..9f3dd7a188f1 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -756,4 +756,143 @@ std::string Config::SaveMembersToString() const { return str_buf.str(); } +const std::string Config::DumpAliases() { + std::stringstream str_buf; + str_buf << "{"; + str_buf << "\"config\": [\"config_file\"], "; + str_buf << "\"task\": [\"task_type\"], "; + str_buf << "\"objective\": [\"objective_type\", \"app\", \"application\", \"loss\"], "; + str_buf << "\"boosting\": [\"boosting_type\", \"boost\"], "; + str_buf << "\"data\": [\"train\", \"train_data\", \"train_data_file\", \"data_filename\"], "; + str_buf << "\"valid\": [\"test\", \"valid_data\", \"valid_data_file\", \"test_data\", \"test_data_file\", \"valid_filenames\"], "; + str_buf << "\"num_iterations\": [\"num_iteration\", \"n_iter\", \"num_tree\", \"num_trees\", \"num_round\", \"num_rounds\", \"nrounds\", \"num_boost_round\", \"n_estimators\", \"max_iter\"], "; + str_buf << "\"learning_rate\": [\"shrinkage_rate\", \"eta\"], "; + str_buf << "\"num_leaves\": [\"num_leaf\", \"max_leaves\", \"max_leaf\", \"max_leaf_nodes\"], "; + str_buf << "\"tree_learner\": [\"tree\", \"tree_type\", \"tree_learner_type\"], "; + str_buf << "\"num_threads\": [\"num_thread\", \"nthread\", \"nthreads\", \"n_jobs\"], "; + str_buf << "\"device_type\": [\"device\"], "; + str_buf << "\"seed\": [\"random_seed\", \"random_state\"], "; + str_buf << "\"deterministic\": [], "; + str_buf << "\"force_col_wise\": [], "; + str_buf << "\"force_row_wise\": [], "; + str_buf << "\"histogram_pool_size\": [\"hist_pool_size\"], "; + str_buf << "\"max_depth\": [], "; + str_buf << "\"min_data_in_leaf\": [\"min_data_per_leaf\", \"min_data\", \"min_child_samples\", \"min_samples_leaf\"], "; + str_buf << "\"min_sum_hessian_in_leaf\": [\"min_sum_hessian_per_leaf\", \"min_sum_hessian\", \"min_hessian\", \"min_child_weight\"], "; + str_buf << "\"bagging_fraction\": [\"sub_row\", \"subsample\", \"bagging\"], "; + str_buf << "\"pos_bagging_fraction\": [\"pos_sub_row\", \"pos_subsample\", \"pos_bagging\"], "; + str_buf << "\"neg_bagging_fraction\": [\"neg_sub_row\", \"neg_subsample\", \"neg_bagging\"], "; + str_buf << "\"bagging_freq\": [\"subsample_freq\"], "; + str_buf << "\"bagging_seed\": [\"bagging_fraction_seed\"], "; + str_buf << "\"feature_fraction\": [\"sub_feature\", \"colsample_bytree\"], "; + str_buf << "\"feature_fraction_bynode\": [\"sub_feature_bynode\", \"colsample_bynode\"], "; + str_buf << "\"feature_fraction_seed\": [], "; + str_buf << "\"extra_trees\": [\"extra_tree\"], "; + str_buf << "\"extra_seed\": [], "; + str_buf << "\"early_stopping_round\": [\"early_stopping_rounds\", \"early_stopping\", \"n_iter_no_change\"], "; + str_buf << "\"first_metric_only\": [], "; + str_buf << "\"max_delta_step\": [\"max_tree_output\", \"max_leaf_output\"], "; + str_buf << "\"lambda_l1\": [\"reg_alpha\", \"l1_regularization\"], "; + str_buf << "\"lambda_l2\": [\"reg_lambda\", \"lambda\", \"l2_regularization\"], "; + str_buf << "\"linear_lambda\": [], "; + str_buf << "\"min_gain_to_split\": [\"min_split_gain\"], "; + str_buf << "\"drop_rate\": [\"rate_drop\"], "; + str_buf << "\"max_drop\": [], "; + str_buf << "\"skip_drop\": [], "; + str_buf << "\"xgboost_dart_mode\": [], "; + str_buf << "\"uniform_drop\": [], "; + str_buf << "\"drop_seed\": [], "; + str_buf << "\"top_rate\": [], "; + str_buf << "\"other_rate\": [], "; + str_buf << "\"min_data_per_group\": [], "; + str_buf << "\"max_cat_threshold\": [], "; + str_buf << "\"cat_l2\": [], "; + str_buf << "\"cat_smooth\": [], "; + str_buf << "\"max_cat_to_onehot\": [], "; + str_buf << "\"top_k\": [\"topk\"], "; + str_buf << "\"monotone_constraints\": [\"mc\", \"monotone_constraint\", \"monotonic_cst\"], "; + str_buf << "\"monotone_constraints_method\": [\"monotone_constraining_method\", \"mc_method\"], "; + str_buf << "\"monotone_penalty\": [\"monotone_splits_penalty\", \"ms_penalty\", \"mc_penalty\"], "; + str_buf << "\"feature_contri\": [\"feature_contrib\", \"fc\", \"fp\", \"feature_penalty\"], "; + str_buf << "\"forcedsplits_filename\": [\"fs\", \"forced_splits_filename\", \"forced_splits_file\", \"forced_splits\"], "; + str_buf << "\"refit_decay_rate\": [], "; + str_buf << "\"cegb_tradeoff\": [], "; + str_buf << "\"cegb_penalty_split\": [], "; + str_buf << "\"cegb_penalty_feature_lazy\": [], "; + str_buf << "\"cegb_penalty_feature_coupled\": [], "; + str_buf << "\"path_smooth\": [], "; + str_buf << "\"interaction_constraints\": [], "; + str_buf << "\"verbosity\": [\"verbose\"], "; + str_buf << "\"input_model\": [\"model_input\", \"model_in\"], "; + str_buf << "\"output_model\": [\"model_output\", \"model_out\"], "; + str_buf << "\"saved_feature_importance_type\": [], "; + str_buf << "\"snapshot_freq\": [\"save_period\"], "; + str_buf << "\"linear_tree\": [\"linear_trees\"], "; + str_buf << "\"max_bin\": [\"max_bins\"], "; + str_buf << "\"max_bin_by_feature\": [], "; + str_buf << "\"min_data_in_bin\": [], "; + str_buf << "\"bin_construct_sample_cnt\": [\"subsample_for_bin\"], "; + str_buf << "\"data_random_seed\": [\"data_seed\"], "; + str_buf << "\"is_enable_sparse\": [\"is_sparse\", \"enable_sparse\", \"sparse\"], "; + str_buf << "\"enable_bundle\": [\"is_enable_bundle\", \"bundle\"], "; + str_buf << "\"use_missing\": [], "; + str_buf << "\"zero_as_missing\": [], "; + str_buf << "\"feature_pre_filter\": [], "; + str_buf << "\"pre_partition\": [\"is_pre_partition\"], "; + str_buf << "\"two_round\": [\"two_round_loading\", \"use_two_round_loading\"], "; + str_buf << "\"header\": [\"has_header\"], "; + str_buf << "\"label_column\": [\"label\"], "; + str_buf << "\"weight_column\": [\"weight\"], "; + str_buf << "\"group_column\": [\"group\", \"group_id\", \"query_column\", \"query\", \"query_id\"], "; + str_buf << "\"ignore_column\": [\"ignore_feature\", \"blacklist\"], "; + str_buf << "\"categorical_feature\": [\"cat_feature\", \"categorical_column\", \"cat_column\", \"categorical_features\"], "; + str_buf << "\"forcedbins_filename\": [], "; + str_buf << "\"save_binary\": [\"is_save_binary\", \"is_save_binary_file\"], "; + str_buf << "\"precise_float_parser\": [], "; + str_buf << "\"parser_config_file\": [], "; + str_buf << "\"start_iteration_predict\": [], "; + str_buf << "\"num_iteration_predict\": [], "; + str_buf << "\"predict_raw_score\": [\"is_predict_raw_score\", \"predict_rawscore\", \"raw_score\"], "; + str_buf << "\"predict_leaf_index\": [\"is_predict_leaf_index\", \"leaf_index\"], "; + str_buf << "\"predict_contrib\": [\"is_predict_contrib\", \"contrib\"], "; + str_buf << "\"predict_disable_shape_check\": [], "; + str_buf << "\"pred_early_stop\": [], "; + str_buf << "\"pred_early_stop_freq\": [], "; + str_buf << "\"pred_early_stop_margin\": [], "; + str_buf << "\"output_result\": [\"predict_result\", \"prediction_result\", \"predict_name\", \"prediction_name\", \"pred_name\", \"name_pred\"], "; + str_buf << "\"convert_model_language\": [], "; + str_buf << "\"convert_model\": [\"convert_model_file\"], "; + str_buf << "\"objective_seed\": [], "; + str_buf << "\"num_class\": [\"num_classes\"], "; + str_buf << "\"is_unbalance\": [\"unbalance\", \"unbalanced_sets\"], "; + str_buf << "\"scale_pos_weight\": [], "; + str_buf << "\"sigmoid\": [], "; + str_buf << "\"boost_from_average\": [], "; + str_buf << "\"reg_sqrt\": [], "; + str_buf << "\"alpha\": [], "; + str_buf << "\"fair_c\": [], "; + str_buf << "\"poisson_max_delta_step\": [], "; + str_buf << "\"tweedie_variance_power\": [], "; + str_buf << "\"lambdarank_truncation_level\": [], "; + str_buf << "\"lambdarank_norm\": [], "; + str_buf << "\"label_gain\": [], "; + str_buf << "\"metric\": [\"metrics\", \"metric_types\"], "; + str_buf << "\"metric_freq\": [\"output_freq\"], "; + str_buf << "\"is_provide_training_metric\": [\"training_metric\", \"is_training_metric\", \"train_metric\"], "; + str_buf << "\"eval_at\": [\"ndcg_eval_at\", \"ndcg_at\", \"map_eval_at\", \"map_at\"], "; + str_buf << "\"multi_error_top_k\": [], "; + str_buf << "\"auc_mu_weights\": [], "; + str_buf << "\"num_machines\": [\"num_machine\"], "; + str_buf << "\"local_listen_port\": [\"local_port\", \"port\"], "; + str_buf << "\"time_out\": [], "; + str_buf << "\"machine_list_filename\": [\"machine_list_file\", \"machine_list\", \"mlist\"], "; + str_buf << "\"machines\": [\"workers\", \"nodes\"], "; + str_buf << "\"gpu_platform_id\": [], "; + str_buf << "\"gpu_device_id\": [], "; + str_buf << "\"gpu_use_dp\": [], "; + str_buf << "\"num_gpu\": []"; + str_buf << "}"; + return str_buf.str(); +} + } // namespace LightGBM diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index 40ad062fb8a7..18a8403eba85 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -569,3 +569,13 @@ def test_smoke_custom_parser(tmp_path): with pytest.raises(lgb.basic.LightGBMError, match="Cannot find parser class 'dummy', please register first or check config format"): data.construct() + + +def test_param_aliases(): + aliases = lgb.basic._ConfigAliases.aliases + assert isinstance(aliases, dict) + assert len(aliases) > 100 + assert all(isinstance(i, set) for i in aliases.values()) + assert all(len(i) >= 1 for i in aliases.values()) + assert all(k in v for k, v in aliases.items()) + assert lgb.basic._ConfigAliases.get('config', 'task') == {'config', 'config_file', 'task', 'task_type'}