From ad1a52770938b9d7c6eb03b95ca77dba83c63e28 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Wed, 16 Dec 2020 16:53:46 -0800 Subject: [PATCH] Enable loading model from <1.0.0 trained with objective='binary:logitraw' (#6517) * Enable loading model from <1.0.0 trained with objective='binary:logitraw' * Add binary:logitraw in model compatibility testing suite * Feedback from @trivialfis: Override ProbToMargin() for LogisticRaw Co-authored-by: Jiaming Yuan --- .../tests/helper_scripts/generate_models.R | 16 +++++---- .../tests/testthat/test_model_compatibility.R | 4 +++ src/objective/regression_loss.h | 3 ++ tests/python/generate_models.py | 34 ++++++++++--------- tests/python/test_model_compatibility.py | 11 ++++++ 5 files changed, 46 insertions(+), 22 deletions(-) diff --git a/R-package/tests/helper_scripts/generate_models.R b/R-package/tests/helper_scripts/generate_models.R index d38b23a19374..5d64fa6c5f03 100644 --- a/R-package/tests/helper_scripts/generate_models.R +++ b/R-package/tests/helper_scripts/generate_models.R @@ -2,7 +2,6 @@ # of saved model files from XGBoost version 0.90 and 1.0.x. library(xgboost) library(Matrix) -source('./generate_models_params.R') set.seed(0) metadata <- list( @@ -53,11 +52,16 @@ generate_logistic_model <- function () { y <- sample(0:1, size = metadata$kRows, replace = TRUE) stopifnot(max(y) == 1, min(y) == 0) - data <- xgb.DMatrix(X, label = y, weight = w) - params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests, - max_depth = metadata$kMaxDepth, objective = 'binary:logistic') - booster <- xgb.train(params, data, nrounds = metadata$kRounds) - save_booster(booster, 'logit') + objective <- c('binary:logistic', 'binary:logitraw') + name <- c('logit', 'logitraw') + + for (i in seq_len(length(objective))) { + data <- xgb.DMatrix(X, label = y, weight = w) + params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests, + max_depth = metadata$kMaxDepth, objective = objective[i]) + booster <- xgb.train(params, data, nrounds = metadata$kRounds) + save_booster(booster, name[i]) + } } generate_classification_model <- function () { diff --git a/R-package/tests/testthat/test_model_compatibility.R b/R-package/tests/testthat/test_model_compatibility.R index a10fead74ddf..d94f17f29ce7 100644 --- a/R-package/tests/testthat/test_model_compatibility.R +++ b/R-package/tests/testthat/test_model_compatibility.R @@ -39,6 +39,10 @@ run_booster_check <- function (booster, name) { testthat::expect_equal(config$learner$learner_train_param$objective, 'multi:softmax') testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), metadata$kClasses) + } else if (name == 'logitraw') { + testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds) + testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0) + testthat::expect_equal(config$learner$learner_train_param$objective, 'binary:logitraw') } else if (name == 'logit') { testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds) testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0) diff --git a/src/objective/regression_loss.h b/src/objective/regression_loss.h index 914a6704fd60..5fc7e92b1dd9 100644 --- a/src/objective/regression_loss.h +++ b/src/objective/regression_loss.h @@ -162,6 +162,9 @@ struct LogisticRaw : public LogisticRegression { predt = common::Sigmoid(predt); return std::max(predt * (T(1.0f) - predt), eps); } + static bst_float ProbToMargin(bst_float base_score) { + return base_score; + } static const char* DefaultEvalMetric() { return "auc"; } static const char* Name() { return "binary:logitraw"; } diff --git a/tests/python/generate_models.py b/tests/python/generate_models.py index 6376d802e659..7b881355e726 100644 --- a/tests/python/generate_models.py +++ b/tests/python/generate_models.py @@ -64,22 +64,24 @@ def generate_logistic_model(): y = np.random.randint(0, 2, size=kRows) assert y.max() == 1 and y.min() == 0 - data = xgboost.DMatrix(X, label=y, weight=w) - booster = xgboost.train({'tree_method': 'hist', - 'num_parallel_tree': kForests, - 'max_depth': kMaxDepth, - 'objective': 'binary:logistic'}, - num_boost_round=kRounds, dtrain=data) - booster.save_model(booster_bin('logit')) - booster.save_model(booster_json('logit')) - - reg = xgboost.XGBClassifier(tree_method='hist', - num_parallel_tree=kForests, - max_depth=kMaxDepth, - n_estimators=kRounds) - reg.fit(X, y, w) - reg.save_model(skl_bin('logit')) - reg.save_model(skl_json('logit')) + for objective, name in [('binary:logistic', 'logit'), ('binary:logitraw', 'logitraw')]: + data = xgboost.DMatrix(X, label=y, weight=w) + booster = xgboost.train({'tree_method': 'hist', + 'num_parallel_tree': kForests, + 'max_depth': kMaxDepth, + 'objective': objective}, + num_boost_round=kRounds, dtrain=data) + booster.save_model(booster_bin(name)) + booster.save_model(booster_json(name)) + + reg = xgboost.XGBClassifier(tree_method='hist', + num_parallel_tree=kForests, + max_depth=kMaxDepth, + n_estimators=kRounds, + objective=objective) + reg.fit(X, y, w) + reg.save_model(skl_bin(name)) + reg.save_model(skl_json(name)) def generate_classification_model(): diff --git a/tests/python/test_model_compatibility.py b/tests/python/test_model_compatibility.py index e02134d6cc71..6f9a184922ab 100644 --- a/tests/python/test_model_compatibility.py +++ b/tests/python/test_model_compatibility.py @@ -24,6 +24,10 @@ def run_booster_check(booster, name): config['learner']['learner_model_param']['base_score']) == 0.5 assert config['learner']['learner_train_param'][ 'objective'] == 'multi:softmax' + elif name.find('logitraw') != -1: + assert len(booster.get_dump()) == gm.kForests * gm.kRounds + assert config['learner']['learner_model_param']['num_class'] == str(0) + assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw' elif name.find('logit') != -1: assert len(booster.get_dump()) == gm.kForests * gm.kRounds assert config['learner']['learner_model_param']['num_class'] == str(0) @@ -77,6 +81,13 @@ def run_scikit_model_check(name, path): assert config['learner']['learner_train_param'][ 'objective'] == 'rank:ndcg' run_model_param_check(config) + elif name.find('logitraw') != -1: + logit = xgboost.XGBClassifier() + logit.load_model(path) + assert (len(logit.get_booster().get_dump()) == + gm.kRounds * gm.kForests) + config = json.loads(logit.get_booster().save_config()) + assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw' elif name.find('logit') != -1: logit = xgboost.XGBClassifier() logit.load_model(path)