Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add C API function that returns all parameter names with their aliases #4829

Merged
merged 17 commits into from
Dec 3, 2021
Merged
135 changes: 31 additions & 104 deletions R-package/R/aliases.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,119 +7,46 @@
# [return] A named list, where each key is a parameter relevant to lgb.Dataset and each value is a character
# vector of corresponding aliases.
.DATASET_PARAMETERS <- function() {
return(
list(
"bin_construct_sample_cnt" = c(
"bin_construct_sample_cnt"
, "subsample_for_bin"
)
, "categorical_feature" = c(
"categorical_feature"
, "cat_feature"
, "categorical_column"
, "cat_column"
, "categorical_features"
)
, "data_random_seed" = c(
"data_random_seed"
, "data_seed"
)
, "enable_bundle" = c(
"enable_bundle"
, "is_enable_bundle"
, "bundle"
)
, "feature_pre_filter" = "feature_pre_filter"
, "forcedbins_filename" = "forcedbins_filename"
, "group_column" = c(
"group_column"
, "group"
, "group_id"
, "query_column"
, "query"
, "query_id"
)
, "header" = c(
"header"
, "has_header"
)
, "ignore_column" = c(
"ignore_column"
, "ignore_feature"
, "blacklist"
)
, "is_enable_sparse" = c(
"is_enable_sparse"
, "is_sparse"
, "enable_sparse"
, "sparse"
)
, "label_column" = c(
"label_column"
, "label"
)
, "linear_tree" = c(
"linear_tree"
, "linear_trees"
)
, "max_bin" = c(
"max_bin"
, "max_bins"
)
, "max_bin_by_feature" = "max_bin_by_feature"
, "min_data_in_bin" = "min_data_in_bin"
, "pre_partition" = c(
"pre_partition"
, "is_pre_partition"
)
, "precise_float_parser" = "precise_float_parser"
, "two_round" = c(
"two_round"
, "two_round_loading"
, "use_two_round_loading"
)
, "use_missing" = "use_missing"
, "weight_column" = c(
"weight_column"
, "weight"
)
, "zero_as_missing" = "zero_as_missing"
)
)
all_aliases <- .PARAMETER_ALIASES()
return(all_aliases[c(
"bin_construct_sample_cnt"
, "categorical_feature"
, "data_random_seed"
, "enable_bundle"
, "feature_pre_filter"
, "forcedbins_filename"
, "group_column"
, "header"
, "ignore_column"
, "is_enable_sparse"
, "label_column"
, "linear_tree"
, "max_bin"
, "max_bin_by_feature"
, "min_data_in_bin"
, "pre_partition"
, "precise_float_parser"
, "two_round"
, "use_missing"
, "weight_column"
, "zero_as_missing"
)])
}

# [description] List of respected parameter aliases. Wrapped in a function to take advantage of
# lazy evaluation (so it doesn't matter what order R sources files during installation).
# [return] A named list, where each key is a main LightGBM parameter and each value is a character
# vector of corresponding aliases.
.PARAMETER_ALIASES <- function() {
learning_params <- list(
"boosting" = c(
"boosting"
, "boost"
, "boosting_type"
)
, "early_stopping_round" = c(
"early_stopping_round"
, "early_stopping_rounds"
, "early_stopping"
, "n_iter_no_change"
)
, "num_iterations" = c(
"num_iterations"
, "num_iteration"
, "n_iter"
, "num_tree"
, "num_trees"
, "num_round"
, "num_rounds"
, "nrounds"
, "num_boost_round"
, "n_estimators"
, "max_iter"
aliases <- jsonlite::fromJSON(
StrikerRUS marked this conversation as resolved.
Show resolved Hide resolved
.Call(
LGBM_DumpParamAliases_R
)
)
return(c(learning_params, .DATASET_PARAMETERS()))
for (alias in names(aliases)) {
aliases[[alias]] <- c(aliases[[alias]], alias)
}
StrikerRUS marked this conversation as resolved.
Show resolved Hide resolved
return(aliases)
}

# [description]
Expand Down
21 changes: 21 additions & 0 deletions R-package/src/lightgbm_R.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -872,6 +872,26 @@ SEXP LGBM_BoosterDumpModel_R(SEXP handle,
R_API_END();
}

SEXP LGBM_DumpParamAliases_R() {
SEXP cont_token = PROTECT(R_MakeUnwindCont());
R_API_BEGIN();
SEXP aliases_str;
int64_t out_len = 0;
int64_t buf_len = 1024 * 1024;
std::vector<char> inner_char_buf(buf_len);
CHECK_CALL(LGBM_DumpParamAliases(buf_len, &out_len, inner_char_buf.data()));
// if aliases string was larger than the initial buffer, allocate a bigger buffer and try again
if (out_len > buf_len) {
inner_char_buf.resize(out_len);
CHECK_CALL(LGBM_DumpParamAliases(out_len, &out_len, inner_char_buf.data()));
}
aliases_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
SET_STRING_ELT(aliases_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
UNPROTECT(2);
return aliases_str;
R_API_END();
}

// .Call() calls
static const R_CallMethodDef CallEntries[] = {
{"LGBM_HandleIsNull_R" , (DL_FUNC) &LGBM_HandleIsNull_R , 1},
Expand Down Expand Up @@ -916,6 +936,7 @@ static const R_CallMethodDef CallEntries[] = {
{"LGBM_BoosterSaveModel_R" , (DL_FUNC) &LGBM_BoosterSaveModel_R , 4},
{"LGBM_BoosterSaveModelToString_R" , (DL_FUNC) &LGBM_BoosterSaveModelToString_R , 3},
{"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3},
{"LGBM_DumpParamAliases_R" , (DL_FUNC) &LGBM_DumpParamAliases_R , 0},
{NULL, NULL, 0}
};

Expand Down
6 changes: 6 additions & 0 deletions R-package/src/lightgbm_R.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,4 +596,10 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R(
SEXP feature_importance_type
);

/*!
* \brief Dump parameter aliases to JSON
* \return R character vector (length=1) with aliases JSON
*/
LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamAliases_R();

#endif // LIGHTGBM_R_H_
1 change: 1 addition & 0 deletions R-package/tests/testthat/test_parameters.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ context("parameter aliases")
test_that(".PARAMETER_ALIASES() returns a named list of character vectors, where names are unique", {
param_aliases <- .PARAMETER_ALIASES()
expect_identical(class(param_aliases), "list")
expect_true(length(param_aliases) > 100)
expect_true(is.character(names(param_aliases)))
expect_true(is.character(param_aliases[["boosting"]]))
expect_true(is.character(param_aliases[["early_stopping_round"]]))
Expand Down
18 changes: 18 additions & 0 deletions helpers/parameter_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
along with parameters description in LightGBM/docs/Parameters.rst file
from the information in LightGBM/include/LightGBM/config.h file.
"""
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -291,6 +292,7 @@ def gen_parameter_code(
keys, infos = get_parameter_infos(config_hpp)
names = get_names(infos)
alias = get_alias(infos)
names_with_aliases = defaultdict(list)
str_to_write = r"""/*!
* Copyright (c) 2018 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for license information.
Expand All @@ -306,6 +308,7 @@ def gen_parameter_code(

for pair in alias:
str_to_write += f' {{"{pair[0]}", "{pair[1]}"}},\n'
names_with_aliases[pair[1]].append(pair[0])
str_to_write += " });\n"
str_to_write += " return aliases;\n"
str_to_write += "}\n\n"
Expand Down Expand Up @@ -353,6 +356,21 @@ def gen_parameter_code(
# tails
str_to_write += " return str_buf.str();\n"
str_to_write += "}\n\n"

str_to_write += "std::string Config::DumpAliases() const {\n"
str_to_write += " std::stringstream str_buf;\n"
str_to_write += ' str_buf << "{";\n'
for idx, name in enumerate(names):
if idx > 0:
str_to_write += ', ";\n'
aliases = '\\", \\"'.join([alias for alias in names_with_aliases[name]])
aliases = f'[\\"{aliases}\\"]' if aliases else '[]'
str_to_write += f' str_buf << "\\"{name}\\": {aliases}'
str_to_write += '";\n'
str_to_write += ' str_buf << "}";\n'
str_to_write += " return str_buf.str();\n"
str_to_write += "}\n\n"

str_to_write += "} // namespace LightGBM\n"
with open(config_out_cpp, "w") as config_out_cpp_file:
config_out_cpp_file.write(str_to_write)
Expand Down
11 changes: 11 additions & 0 deletions include/LightGBM/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ typedef void* FastConfigHandle; /*!< \brief Handle of FastConfig. */
*/
LIGHTGBM_C_EXPORT const char* LGBM_GetLastError();

/*!
* \brief Dump all parameter names with their aliases to JSON.
* \param buffer_len String buffer length, if ``buffer_len < out_len``, you should re-allocate buffer
* \param[out] out_len Actual output length
* \param[out] out_str JSON format string of parameters, should pre-allocate memory
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_DumpParamAliases(int64_t buffer_len,
int64_t* out_len,
char* out_str);

/*!
* \brief Register a callback function for log redirecting.
* \param callback The callback function to register
Expand Down
1 change: 1 addition & 0 deletions include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,7 @@ struct Config {
static const std::unordered_set<std::string>& parameter_set();
std::vector<std::vector<double>> auc_mu_weights_matrix;
std::vector<std::vector<int>> interaction_constraints_vector;
static std::string DumpAliases();
StrikerRUS marked this conversation as resolved.
Show resolved Hide resolved

private:
void CheckParamConflict();
Expand Down
111 changes: 21 additions & 90 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,96 +324,27 @@ class LGBMDeprecationWarning(UserWarning):


class _ConfigAliases:
aliases = {"bin_construct_sample_cnt": {"bin_construct_sample_cnt",
"subsample_for_bin"},
"boosting": {"boosting",
"boosting_type",
"boost"},
"categorical_feature": {"categorical_feature",
"cat_feature",
"categorical_column",
"cat_column",
"categorical_features"},
"data_random_seed": {"data_random_seed",
"data_seed"},
"early_stopping_round": {"early_stopping_round",
"early_stopping_rounds",
"early_stopping",
"n_iter_no_change"},
"enable_bundle": {"enable_bundle",
"is_enable_bundle",
"bundle"},
"eval_at": {"eval_at",
"ndcg_eval_at",
"ndcg_at",
"map_eval_at",
"map_at"},
"group_column": {"group_column",
"group",
"group_id",
"query_column",
"query",
"query_id"},
"header": {"header",
"has_header"},
"ignore_column": {"ignore_column",
"ignore_feature",
"blacklist"},
"is_enable_sparse": {"is_enable_sparse",
"is_sparse",
"enable_sparse",
"sparse"},
"label_column": {"label_column",
"label"},
"linear_tree": {"linear_tree",
"linear_trees"},
"local_listen_port": {"local_listen_port",
"local_port",
"port"},
"machines": {"machines",
"workers",
"nodes"},
"max_bin": {"max_bin",
"max_bins"},
"metric": {"metric",
"metrics",
"metric_types"},
"num_class": {"num_class",
"num_classes"},
"num_iterations": {"num_iterations",
"num_iteration",
"n_iter",
"num_tree",
"num_trees",
"num_round",
"num_rounds",
"nrounds",
"num_boost_round",
"n_estimators",
"max_iter"},
"num_machines": {"num_machines",
"num_machine"},
"num_threads": {"num_threads",
"num_thread",
"nthread",
"nthreads",
"n_jobs"},
"objective": {"objective",
"objective_type",
"app",
"application",
"loss"},
"pre_partition": {"pre_partition",
"is_pre_partition"},
"tree_learner": {"tree_learner",
"tree",
"tree_type",
"tree_learner_type"},
"two_round": {"two_round",
"two_round_loading",
"use_two_round_loading"},
"weight_column": {"weight_column",
"weight"}}
buffer_len = 1 << 20
tmp_out_len = ctypes.c_int64(0)
string_buffer = ctypes.create_string_buffer(buffer_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
_safe_call(_LIB.LGBM_DumpParamAliases(
ctypes.c_int64(buffer_len),
ctypes.byref(tmp_out_len),
ptr_string_buffer))
actual_len = tmp_out_len.value
# if buffer length is not long enough, re-allocate a buffer
if actual_len > buffer_len:
string_buffer = ctypes.create_string_buffer(actual_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
_safe_call(_LIB.LGBM_DumpParamAliases(
ctypes.c_int64(actual_len),
ctypes.byref(tmp_out_len),
ptr_string_buffer))
aliases = json.loads(
string_buffer.value.decode('utf-8'),
object_hook=lambda obj: {k: set(v) | {k} for k, v in obj.items()}
)

@classmethod
def get(cls, *args):
Expand Down
Loading