From 91a8d8f80a67b10e9765de0c81c659e2d5f2660d Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 31 Jan 2020 01:21:26 -0800 Subject: [PATCH 01/10] Fix loading old logit model. * Remove saving metrics. * Add version as an attribute. * Remove the size check in R test to relax the size constraint. * Add missing R doc for passing linting. Run devtools. * Cleanup old model IO logic. * Test compatibility on CI. --- R-package/R/xgb.Booster.R | 2 + R-package/man/agaricus.test.Rd | 6 +- R-package/man/agaricus.train.Rd | 6 +- R-package/man/predict.xgb.Booster.Rd | 3 + .../tests/testthat/test_custom_objective.R | 2 - doc/python/convert_090to100.py | 76 +++++++++ doc/tutorials/saving_model.rst | 7 +- include/xgboost/learner.h | 2 + python-package/xgboost/core.py | 99 ++++++----- python-package/xgboost/dask.py | 1 + src/learner.cc | 159 +++++++++--------- tests/ci_build/Dockerfile.cpu | 5 +- tests/python/generate_models.py | 26 ++- tests/python/models/version | 1 - .../models/xgboost-1.0.0-SNAPSHOT.cls.bin | Bin 4891 -> 0 bytes .../models/xgboost-1.0.0-SNAPSHOT.cls.json | 1 - .../models/xgboost-1.0.0-SNAPSHOT.ltr.bin | Bin 1799 -> 0 bytes .../models/xgboost-1.0.0-SNAPSHOT.ltr.json | 1 - .../models/xgboost-1.0.0-SNAPSHOT.reg.bin | Bin 1950 -> 0 bytes .../models/xgboost-1.0.0-SNAPSHOT.reg.json | 1 - .../xgboost_scikit-1.0.0-SNAPSHOT.cls.bin | Bin 5641 -> 0 bytes .../xgboost_scikit-1.0.0-SNAPSHOT.cls.json | 1 - .../xgboost_scikit-1.0.0-SNAPSHOT.ltr.bin | Bin 2445 -> 0 bytes .../xgboost_scikit-1.0.0-SNAPSHOT.ltr.json | 1 - .../xgboost_scikit-1.0.0-SNAPSHOT.reg.bin | Bin 2606 -> 0 bytes .../xgboost_scikit-1.0.0-SNAPSHOT.reg.json | 1 - tests/python/test_basic.py | 2 +- tests/python/test_basic_models.py | 23 ++- tests/python/test_model_compatibility.py | 141 +++++++++++++--- 29 files changed, 397 insertions(+), 170 deletions(-) create mode 100644 doc/python/convert_090to100.py delete mode 100644 tests/python/models/version delete mode 100644 tests/python/models/xgboost-1.0.0-SNAPSHOT.cls.bin delete mode 100644 tests/python/models/xgboost-1.0.0-SNAPSHOT.cls.json delete mode 100644 tests/python/models/xgboost-1.0.0-SNAPSHOT.ltr.bin delete mode 100644 tests/python/models/xgboost-1.0.0-SNAPSHOT.ltr.json delete mode 100644 tests/python/models/xgboost-1.0.0-SNAPSHOT.reg.bin delete mode 100644 tests/python/models/xgboost-1.0.0-SNAPSHOT.reg.json delete mode 100644 tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.cls.bin delete mode 100644 tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.cls.json delete mode 100644 tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.ltr.bin delete mode 100644 tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.ltr.json delete mode 100644 tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.reg.bin delete mode 100644 tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.reg.json diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index f18632500207..660264e0b7b7 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -139,6 +139,8 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) { #' @param reshape whether to reshape the vector of predictions to a matrix form when there are several #' prediction outputs per case. This option has no effect when either of predleaf, predcontrib, #' or predinteraction flags is TRUE. +#' @param training whether is the prediction result used for training. For dart booster, +#' training predicting will perform dropout. #' @param ... Parameters passed to \code{predict.xgb.Booster} #' #' @details diff --git a/R-package/man/agaricus.test.Rd b/R-package/man/agaricus.test.Rd index 041ff4e6c813..b88b340966dc 100644 --- a/R-package/man/agaricus.test.Rd +++ b/R-package/man/agaricus.test.Rd @@ -4,7 +4,7 @@ \name{agaricus.test} \alias{agaricus.test} \title{Test part from Mushroom Data Set} -\format{A list containing a label vector, and a dgCMatrix object with 1611 +\format{A list containing a label vector, and a dgCMatrix object with 1611 rows and 126 variables} \usage{ data(agaricus.test) @@ -24,8 +24,8 @@ This data set includes the following fields: \references{ https://archive.ics.uci.edu/ml/datasets/Mushroom -Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository -[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, +Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository +[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science. } \keyword{datasets} diff --git a/R-package/man/agaricus.train.Rd b/R-package/man/agaricus.train.Rd index 0c08e8080de1..6df609699dd9 100644 --- a/R-package/man/agaricus.train.Rd +++ b/R-package/man/agaricus.train.Rd @@ -4,7 +4,7 @@ \name{agaricus.train} \alias{agaricus.train} \title{Training part from Mushroom Data Set} -\format{A list containing a label vector, and a dgCMatrix object with 6513 +\format{A list containing a label vector, and a dgCMatrix object with 6513 rows and 127 variables} \usage{ data(agaricus.train) @@ -24,8 +24,8 @@ This data set includes the following fields: \references{ https://archive.ics.uci.edu/ml/datasets/Mushroom -Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository -[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, +Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository +[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science. } \keyword{datasets} diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd index 69b48cd15bba..6430eabf5c63 100644 --- a/R-package/man/predict.xgb.Booster.Rd +++ b/R-package/man/predict.xgb.Booster.Rd @@ -49,6 +49,9 @@ It will use all the trees by default (\code{NULL} value).} prediction outputs per case. This option has no effect when either of predleaf, predcontrib, or predinteraction flags is TRUE.} +\item{training}{whether is the prediction result used for training. For dart booster, +training predicting will perform dropout.} + \item{...}{Parameters passed to \code{predict.xgb.Booster}} } \value{ diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R index 79d8eccf8795..5e40a9b8a8b1 100644 --- a/R-package/tests/testthat/test_custom_objective.R +++ b/R-package/tests/testthat/test_custom_objective.R @@ -31,7 +31,6 @@ num_round <- 2 test_that("custom objective works", { bst <- xgb.train(param, dtrain, num_round, watchlist) expect_equal(class(bst), "xgb.Booster") - expect_equal(length(bst$raw), 1100) expect_false(is.null(bst$evaluation_log)) expect_false(is.null(bst$evaluation_log$eval_error)) expect_lt(bst$evaluation_log[num_round, eval_error], 0.03) @@ -58,5 +57,4 @@ test_that("custom objective using DMatrix attr works", { param$objective = logregobjattr bst <- xgb.train(param, dtrain, num_round, watchlist) expect_equal(class(bst), "xgb.Booster") - expect_equal(length(bst$raw), 1100) }) diff --git a/doc/python/convert_090to100.py b/doc/python/convert_090to100.py new file mode 100644 index 000000000000..21aa0fcdc7db --- /dev/null +++ b/doc/python/convert_090to100.py @@ -0,0 +1,76 @@ +'''This is a simple script that converts a pickled XGBoost +Scikit-Learn interface object from 0.90 to a native model. Pickle +format is not stable as it's a direct serialization of Python object. +We advice not to use it when stability is needed. + +''' +import pickle +import json +import os +import argparse +import numpy as np +import xgboost +import warnings + + +def save_label_encoder(le): + '''Save the label encoder in XGBClassifier''' + meta = dict() + for k, v in le.__dict__.items(): + if isinstance(v, np.ndarray): + meta[k] = v.tolist() + else: + meta[k] = v + return meta + + +def xgboost_skl_90to100(skl_model): + '''Extract the model and related metadata in SKL model.''' + model = {} + with open(skl_model, 'rb') as fd: + old = pickle.load(fd) + if not isinstance(old, xgboost.XGBModel): + raise TypeError( + 'The script only handes Scikit-Learn interface object') + + # Save Scikit-Learn specific Python attributes into a JSON document. + for k, v in old.__dict__.items(): + if k == '_le': + model[k] = save_label_encoder(v) + elif k == 'classes_': + model[k] = v.tolist() + elif k == '_Booster': + continue + else: + try: + json.dumps({k: v}) + model[k] = v + except TypeError: + warnings.warn(str(k) + ' is not saved in Scikit-Learn meta.') + booster = old.get_booster() + # Store the JSON serialization as an attribute + booster.set_attr(scikit_learn=json.dumps(model)) + + # Save it into a native model. + i = 0 + while True: + path = str(i) + '_xgboost_model_from_old_pickle.model' + if os.path.exists(path): + i += 1 + continue + booster.save_model(path) + break + + +if __name__ == '__main__': + assert xgboost.__version__ != '1.0.0', ('Please use the XGBoost version' + ' that generates this pickle.') + parser = argparse.ArgumentParser( + description=('A simple script to convert pickle generated by' + ' XGBoost 0.90 to XGBoost 1.0.0 model (not pickle).') + ) + parser.add_argument('--old-pickle', type=str, + help='Path to old pickle file.') + args = parser.parse_args() + + xgboost_skl_90to100(args.old_pickle) diff --git a/doc/tutorials/saving_model.rst b/doc/tutorials/saving_model.rst index aa3b41e6b598..7187452774ec 100644 --- a/doc/tutorials/saving_model.rst +++ b/doc/tutorials/saving_model.rst @@ -91,7 +91,12 @@ Loading pickled file from different version of XGBoost As noted, pickled model is neither portable nor stable, but in some cases the pickled models are valuable. One way to restore it in the future is to load it back with that -specific version of Python and XGBoost, export the model by calling `save_model`. +specific version of Python and XGBoost, export the model by calling `save_model`. To help +easing the mitigation, we created a simple script for converting pickled XGBoost 0.90 +Scikit-Learn interface object to XGBoost 1.0.0 native model. Please note that the script +suites simple use cases, and it's adviced not to use pickle when stability is needed. +It's located in ``xgboost/doc/ptyhon`` with the name ``convert_090to100.py``. See +comments in the script for more details. ******************************************************** Saving and Loading the internal parameters configuration diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h index 1f78382c7da3..112ed7a571f0 100644 --- a/include/xgboost/learner.h +++ b/include/xgboost/learner.h @@ -208,6 +208,8 @@ struct LearnerModelParam { // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep // this one as an immutable copy. LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin); + /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */ + bool Initialized() const { return num_feature != 0; } }; } // namespace xgboost diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index f134c0399999..b7baf22ad76d 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -896,11 +896,12 @@ def slice(self, rindex, allow_groups=False): res = DMatrix(None, feature_names=self.feature_names, feature_types=self.feature_types) res.handle = ctypes.c_void_p() - _check_call(_LIB.XGDMatrixSliceDMatrixEx(self.handle, - c_array(ctypes.c_int, rindex), - c_bst_ulong(len(rindex)), - ctypes.byref(res.handle), - ctypes.c_int(1 if allow_groups else 0))) + _check_call(_LIB.XGDMatrixSliceDMatrixEx( + self.handle, + c_array(ctypes.c_int, rindex), + c_bst_ulong(len(rindex)), + ctypes.byref(res.handle), + ctypes.c_int(1 if allow_groups else 0))) return res @property @@ -954,7 +955,8 @@ def feature_names(self, feature_names): if not all(isinstance(f, STRING_TYPES) and not any(x in f for x in set(('[', ']', '<'))) for f in feature_names): - raise ValueError('feature_names must be string, and may not contain [, ] or <') + raise ValueError('feature_names must be string, and may not ' + 'contain [, ] or <') else: # reset feature_types also self.feature_types = None @@ -996,7 +998,8 @@ def feature_types(self, feature_types): valid = ('int', 'float', 'i', 'q') if not all(isinstance(f, STRING_TYPES) and f in valid for f in feature_types): - raise ValueError('All feature_names must be {int, float, i, q}') + raise ValueError( + 'All feature_names must be {int, float, i, q}') self._feature_types = feature_types @@ -1024,7 +1027,8 @@ def __init__(self, params=None, cache=(), model_file=None): """ for d in cache: if not isinstance(d, DMatrix): - raise TypeError('invalid cache item: {}'.format(type(d).__name__), cache) + raise TypeError('invalid cache item: {}'.format( + type(d).__name__), cache) self._validate_features(d) dmats = c_array(ctypes.c_void_p, [d.handle for d in cache]) @@ -1033,7 +1037,7 @@ def __init__(self, params=None, cache=(), model_file=None): ctypes.byref(self.handle))) if isinstance(params, dict) and \ - 'validate_parameters' not in params.keys(): + 'validate_parameters' not in params.keys(): params['validate_parameters'] = 1 self.set_param(params or {}) if (params is not None) and ('booster' in params): @@ -1162,7 +1166,8 @@ def attr(self, key): Returns ------- value : str - The attribute value of the key, returns None if attribute do not exist. + The attribute value of the key, returns None if attribute do not + exist. """ ret = ctypes.c_char_p() success = ctypes.c_int() @@ -1177,8 +1182,8 @@ def attributes(self): Returns ------- - result : dictionary of attribute_name: attribute_value pairs of strings. - Returns an empty dict if there's no attributes. + result : dictionary of attribute_name: attribute_value pairs of + strings. Returns an empty dict if there's no attributes. """ length = c_bst_ulong() sarr = ctypes.POINTER(ctypes.c_char_p)() @@ -1194,7 +1199,8 @@ def set_attr(self, **kwargs): Parameters ---------- **kwargs - The attributes to set. Setting a value to None deletes an attribute. + The attributes to set. Setting a value to None deletes an + attribute. """ for key, value in kwargs.items(): if value is not None: @@ -1267,9 +1273,11 @@ def boost(self, dtrain, grad, hess): """ if len(grad) != len(hess): - raise ValueError('grad / hess length mismatch: {} / {}'.format(len(grad), len(hess))) + raise ValueError('grad / hess length mismatch: {} / {}'.format( + len(grad), len(hess))) if not isinstance(dtrain, DMatrix): - raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__)) + raise TypeError('invalid training matrix: {}'.format( + type(dtrain).__name__)) self._validate_features(dtrain) _check_call(_LIB.XGBoosterBoostOneIter(self.handle, dtrain.handle, @@ -1619,14 +1627,16 @@ def get_fscore(self, fmap=''): .. note:: Feature importance is defined only for tree boosters - Feature importance is only defined when the decision tree model is chosen as base - learner (`booster=gbtree`). It is not defined for other base learner types, such - as linear learners (`booster=gblinear`). + Feature importance is only defined when the decision tree model is + chosen as base learner (`booster=gbtree`). It is not defined for + other base learner types, such as linear learners + (`booster=gblinear`). .. note:: Zero-importance features will not be included - Keep in mind that this function does not include zero-importance feature, i.e. - those features that have not been used in any split conditions. + Keep in mind that this function does not include zero-importance + feature, i.e. those features that have not been used in any split + conditions. Parameters ---------- @@ -1640,17 +1650,22 @@ def get_score(self, fmap='', importance_type='weight'): """Get feature importance of each feature. Importance type can be defined as: - * 'weight': the number of times a feature is used to split the data across all trees. + * 'weight': the number of times a feature is used to split the data + across all trees. * 'gain': the average gain across all splits the feature is used in. - * 'cover': the average coverage across all splits the feature is used in. - * 'total_gain': the total gain across all splits the feature is used in. - * 'total_cover': the total coverage across all splits the feature is used in. + * 'cover': the average coverage across all splits the feature is used + in. + * 'total_gain': the total gain across all splits the feature is used + in. + * 'total_cover': the total coverage across all splits the feature is + used in. .. note:: Feature importance is defined only for tree boosters - Feature importance is only defined when the decision tree model is chosen as base - learner (`booster=gbtree`). It is not defined for other base learner types, such - as linear learners (`booster=gblinear`). + Feature importance is only defined when the decision tree + model is chosen as base learner (`booster=gbtree`). It is + not defined for other base learner types, such as linear + learners (`booster=gblinear`). Parameters ---------- @@ -1658,13 +1673,17 @@ def get_score(self, fmap='', importance_type='weight'): The name of feature map file. importance_type: str, default 'weight' One of the importance types defined above. + """ fmap = os_fspath(fmap) - if getattr(self, 'booster', None) is not None and self.booster not in {'gbtree', 'dart'}: - raise ValueError('Feature importance is not defined for Booster type {}' - .format(self.booster)) + if getattr(self, 'booster', None) is not None and self.booster not in { + 'gbtree', 'dart'}: + raise ValueError( + 'Feature importance is not defined for Booster type {}' + .format(self.booster)) - allowed_importance_types = ['weight', 'gain', 'cover', 'total_gain', 'total_cover'] + allowed_importance_types = ['weight', 'gain', 'cover', 'total_gain', + 'total_cover'] if importance_type not in allowed_importance_types: msg = ("importance_type mismatch, got '{}', expected one of " + repr(allowed_importance_types)) @@ -1716,7 +1735,8 @@ def get_score(self, fmap='', importance_type='weight'): if len(arr) == 1: continue - # look for the closing bracket, extract only info within that bracket + # look for the closing bracket, extract only info within that + # bracket fid = arr[1].split(']') # extract gain or cover from string after closing bracket @@ -1743,9 +1763,9 @@ def get_score(self, fmap='', importance_type='weight'): def trees_to_dataframe(self, fmap=''): """Parse a boosted tree model text dump into a pandas DataFrame structure. - This feature is only defined when the decision tree model is chosen as base - learner (`booster in {gbtree, dart}`). It is not defined for other base learner - types, such as linear learners (`booster=gblinear`). + This feature is only defined when the decision tree model is chosen as + base learner (`booster in {gbtree, dart}`). It is not defined for other + base learner types, such as linear learners (`booster=gblinear`). Parameters ---------- @@ -1758,7 +1778,8 @@ def trees_to_dataframe(self, fmap=''): raise Exception(('pandas must be available to use this method.' 'Install pandas before calling again.')) - if getattr(self, 'booster', None) is not None and self.booster not in {'gbtree', 'dart'}: + if getattr(self, 'booster', None) is not None and self.booster not in { + 'gbtree', 'dart'}: raise ValueError('This method is not defined for Booster type {}' .format(self.booster)) @@ -1814,7 +1835,8 @@ def trees_to_dataframe(self, fmap=''): gains.append(float(stats[7])) covers.append(float(stats[9])) - ids = [str(t_id) + '-' + str(n_id) for t_id, n_id in zip(tree_ids, node_ids)] + ids = [str(t_id) + '-' + str(n_id) + for t_id, n_id in zip(tree_ids, node_ids)] df = DataFrame({'Tree': tree_ids, 'Node': node_ids, 'ID': ids, 'Feature': fids, 'Split': splits, 'Yes': y_directs, 'No': n_directs, 'Missing': missings, 'Gain': gains, @@ -1894,5 +1916,6 @@ def get_split_value_histogram(self, feature, fmap='', bins=None, return DataFrame(nph, columns=['SplitValue', 'Count']) if as_pandas and not PANDAS_INSTALLED: sys.stderr.write( - "Returning histogram as ndarray (as_pandas == True, but pandas is not installed).") + "Returning histogram as ndarray (as_pandas == True, but pandas" + " is not installed).") return nph diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index 3b5c8ff594f9..fde1b1657663 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -600,6 +600,7 @@ def fit(self, results = train(self.client, params, dtrain, num_boost_round=self.get_num_boosting_rounds(), evals=evals) + # pylint: disable=attribute-defined-outside-init self._Booster = results['booster'] # pylint: disable=attribute-defined-outside-init self.evals_result_ = results['history'] diff --git a/src/learner.cc b/src/learner.cc index 70ffceda67c7..ea9d610f50f2 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -1,5 +1,5 @@ /*! - * Copyright 2014-2019 by Contributors + * Copyright 2014-2020 by Contributors * \file learner.cc * \brief Implementation of learning algorithm. * \author Tianqi Chen @@ -117,8 +117,9 @@ LearnerModelParam::LearnerModelParam( LearnerModelParamLegacy const &user_param, float base_margin) : base_score{base_margin}, num_feature{user_param.num_feature}, num_output_group{user_param.num_class == 0 - ? 1 - : static_cast(user_param.num_class)} {} + ? 1 + : static_cast(user_param.num_class)} +{} struct LearnerTrainParam : public XGBoostParameter { // data split mode, can be row, col, or none. @@ -140,7 +141,7 @@ struct LearnerTrainParam : public XGBoostParameter { .describe("Data split mode for distributed training."); DMLC_DECLARE_FIELD(disable_default_eval_metric) .set_default(0) - .describe("flag to disable default metric. Set to >0 to disable"); + .describe("Flag to disable default metric. Set to >0 to disable"); DMLC_DECLARE_FIELD(booster) .set_default("gbtree") .describe("Gradient booster used for training."); @@ -200,6 +201,7 @@ class LearnerImpl : public Learner { Args args = {cfg_.cbegin(), cfg_.cend()}; tparam_.UpdateAllowUnknown(args); + auto mparam_backup = mparam_; mparam_.UpdateAllowUnknown(args); generic_parameters_.UpdateAllowUnknown(args); generic_parameters_.CheckDeprecated(); @@ -217,17 +219,33 @@ class LearnerImpl : public Learner { // set seed only before the model is initialized common::GlobalRandom().seed(generic_parameters_.seed); + // must precede configure gbm since num_features is required for gbm this->ConfigureNumFeatures(); args = {cfg_.cbegin(), cfg_.cend()}; // renew this->ConfigureObjective(old_tparam, &args); - this->ConfigureGBM(old_tparam, args); - this->ConfigureMetrics(args); + // Before 1.0.0, we save `base_score` into binary as a transformed value by objective. + // After 1.0.0 we save the value provided by user and keep it immutable instead. To + // keep the stability, we initialize it in binary LoadModel instead of configuration. + // Under what condition should we omit the transformation: + // + // - base_score is loaded from old binary model. + // + // What are the other possible conditions: + // + // - model loaded from new binary or JSON. + // - model is created from scratch. + // - model is configured second time due to change of parameter + if (!learner_model_param_.Initialized() || mparam_.base_score != mparam_backup.base_score) { + learner_model_param_ = LearnerModelParam(mparam_, + obj_->ProbToMargin(mparam_.base_score)); + } + + this->ConfigureGBM(old_tparam, args); generic_parameters_.ConfigureGpuId(this->gbm_->UseGPU()); - learner_model_param_ = LearnerModelParam(mparam_, - obj_->ProbToMargin(mparam_.base_score)); + this->ConfigureMetrics(args); this->need_configuration_ = false; if (generic_parameters_.validate_parameters) { @@ -337,9 +355,6 @@ class LearnerImpl : public Learner { cache_)); gbm_->LoadModel(gradient_booster); - learner_model_param_ = LearnerModelParam(mparam_, - obj_->ProbToMargin(mparam_.base_score)); - auto const& j_attributes = get(learner.at("attributes")); attributes_.clear(); for (auto const& kv : j_attributes) { @@ -459,6 +474,7 @@ class LearnerImpl : public Learner { } if (header[0] == '{') { + // Dispatch to JSON auto json_stream = common::FixedSizeStream(&fp); std::string buffer; json_stream.Take(&buffer); @@ -471,25 +487,9 @@ class LearnerImpl : public Learner { // read parameter CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_)) << "BoostLearner: wrong model format"; - { - // backward compatibility code for compatible with old model type - // for new model, Read(&name_obj_) is suffice - uint64_t len; - CHECK_EQ(fi->Read(&len, sizeof(len)), sizeof(len)); - if (len >= std::numeric_limits::max()) { - int gap; - CHECK_EQ(fi->Read(&gap, sizeof(gap)), sizeof(gap)) - << "BoostLearner: wrong model format"; - len = len >> static_cast(32UL); - } - if (len != 0) { - tparam_.objective.resize(len); - CHECK_EQ(fi->Read(&tparam_.objective[0], len), len) - << "BoostLearner: wrong model format"; - } - } + CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format"; CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format"; - // duplicated code with LazyInitModel + obj_.reset(ObjFunction::Create(tparam_.objective, &generic_parameters_)); gbm_.reset(GradientBooster::Create(tparam_.booster, &generic_parameters_, &learner_model_param_, cache_)); @@ -508,17 +508,38 @@ class LearnerImpl : public Learner { } attributes_ = std::map(attr.begin(), attr.end()); } - if (tparam_.objective == "count:poisson") { - std::string max_delta_step; - fi->Read(&max_delta_step); - cfg_["max_delta_step"] = max_delta_step; + bool warn_old_model { false }; + if (attributes_.find("count_poisson_max_delta_step") != attributes_.cend()) { + // Loading model from < 1.0.0, objective is not saved. + cfg_["max_delta_step"] = attributes_["count_poisson_max_delta_step"]; + attributes_.erase("count_poisson_max_delta_step"); + } else { + warn_old_model = true; } - if (mparam_.contain_eval_metrics != 0) { - std::vector metr; - fi->Read(&metr); - for (auto name : metr) { - metrics_.emplace_back(Metric::Create(name, &generic_parameters_)); - } + + if (attributes_.find("version") != attributes_.cend()) { + learner_model_param_ = LearnerModelParam(mparam_, + obj_->ProbToMargin(mparam_.base_score)); + attributes_.erase("version"); + } else { + // Before 1.0.0, base_score is saved as a transformed value, and there's no version + // attribute in the saved model. + learner_model_param_ = LearnerModelParam(mparam_, mparam_.base_score); + warn_old_model = true; + } + if (attributes_.find("objective") != attributes_.cend()) { + auto obj_str = attributes_.at("objective"); + auto j_obj = Json::Load({obj_str.c_str(), obj_str.size()}); + obj_->LoadConfig(j_obj); + attributes_.erase("objective"); + } else { + // Similar to JSON model IO, we save the objective. + warn_old_model = true; + } + + if (warn_old_model) { + LOG(WARNING) << "Loading model from XGBoost < 1.0.0, consider saving it " + "again for improved compatibility"; } cfg_["num_class"] = common::ToString(mparam_.num_class); @@ -527,15 +548,6 @@ class LearnerImpl : public Learner { auto n = tparam_.__DICT__(); cfg_.insert(n.cbegin(), n.cend()); - Args args = {cfg_.cbegin(), cfg_.cend()}; - generic_parameters_.UpdateAllowUnknown(args); - gbm_->Configure(args); - obj_->Configure({cfg_.begin(), cfg_.end()}); - - for (auto& p_metric : metrics_) { - p_metric->Configure({cfg_.begin(), cfg_.end()}); - } - // copy dsplit from config since it will not run again during restore if (tparam_.dsplit == DataSplitMode::kAuto && rabit::IsDistributed()) { tparam_.dsplit = DataSplitMode::kRow; @@ -552,15 +564,9 @@ class LearnerImpl : public Learner { void SaveModel(dmlc::Stream* fo) const override { LearnerModelParamLegacy mparam = mparam_; // make a copy to potentially modify std::vector > extra_attr; - // extra attributed to be added just before saving - if (tparam_.objective == "count:poisson") { - auto it = cfg_.find("max_delta_step"); - if (it != cfg_.end()) { - // write `max_delta_step` parameter as extra attribute of booster - mparam.contain_extra_attrs = 1; - extra_attr.emplace_back("count_poisson_max_delta_step", it->second); - } - } + mparam.contain_extra_attrs = 1; + extra_attr.emplace_back(std::make_pair("version", Version::String(Version::Self()))); + { std::vector saved_params; // check if rabit_bootstrap_cache were set to non zero before adding to checkpoint @@ -577,6 +583,14 @@ class LearnerImpl : public Learner { } } } + { + // Save the objective. + Json j_obj { Object() }; + obj_->SaveConfig(&j_obj); + std::string obj_doc; + Json::Dump(j_obj, &obj_doc); + extra_attr.emplace_back("objective", obj_doc); + } fo->Write(&mparam, sizeof(LearnerModelParamLegacy)); fo->Write(tparam_.objective); fo->Write(tparam_.booster); @@ -587,26 +601,7 @@ class LearnerImpl : public Learner { attr[kv.first] = kv.second; } fo->Write(std::vector>( - attr.begin(), attr.end())); - } - if (tparam_.objective == "count:poisson") { - auto it = cfg_.find("max_delta_step"); - if (it != cfg_.end()) { - fo->Write(it->second); - } else { - // recover value of max_delta_step from extra attributes - auto it2 = attributes_.find("count_poisson_max_delta_step"); - const std::string max_delta_step - = (it2 != attributes_.end()) ? it2->second : kMaxDeltaStepDefaultValue; - fo->Write(max_delta_step); - } - } - if (mparam.contain_eval_metrics != 0) { - std::vector metr; - for (auto& ev : metrics_) { - metr.emplace_back(ev->Name()); - } - fo->Write(metr); + attr.begin(), attr.end())); } } @@ -661,11 +656,13 @@ class LearnerImpl : public Learner { If you are loading a serialized model (like pickle in Python) generated by older XGBoost, please export the model by calling `Booster.save_model` from that version - first, then load it back in current version. See: + first, then load it back in current version. There's a simple script for helping + the process. See: https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html - for more details about differences between saving model and serializing. + for reference to the script, and more details about differences between saving model and + serializing. )doc"; int64_t sz {-1}; @@ -854,7 +851,8 @@ class LearnerImpl : public Learner { void ConfigureObjective(LearnerTrainParam const& old, Args* p_args) { // Once binary IO is gone, NONE of these config is useful. - if (cfg_.find("num_class") != cfg_.cend() && cfg_.at("num_class") != "0") { + if (cfg_.find("num_class") != cfg_.cend() && cfg_.at("num_class") != "0" && + tparam_.objective != "multi:softprob") { cfg_["num_output_group"] = cfg_["num_class"]; if (atoi(cfg_["num_class"].c_str()) > 1 && cfg_.count("objective") == 0) { tparam_.objective = "multi:softmax"; @@ -919,7 +917,6 @@ class LearnerImpl : public Learner { } CHECK_NE(mparam_.num_feature, 0) << "0 feature is supplied. Are you using raw Booster interface?"; - learner_model_param_.num_feature = mparam_.num_feature; // Remove these once binary IO is gone. cfg_["num_feature"] = common::ToString(mparam_.num_feature); cfg_["num_class"] = common::ToString(mparam_.num_class); diff --git a/tests/ci_build/Dockerfile.cpu b/tests/ci_build/Dockerfile.cpu index 7f85010466d0..39b209c8e4b1 100644 --- a/tests/ci_build/Dockerfile.cpu +++ b/tests/ci_build/Dockerfile.cpu @@ -21,8 +21,9 @@ ENV GOSU_VERSION 1.10 # Install Python packages RUN \ - pip install pyyaml cpplint pylint astroid sphinx numpy scipy pandas matplotlib sh recommonmark guzzle_sphinx_theme mock \ - breathe matplotlib graphviz pytest scikit-learn wheel kubernetes urllib3 jsonschema && \ + pip install pyyaml cpplint pylint astroid sphinx numpy scipy pandas matplotlib sh \ + recommonmark guzzle_sphinx_theme mock breathe matplotlib graphviz \ + pytest scikit-learn wheel kubernetes urllib3 jsonschema boto3 && \ pip install https://h2o-release.s3.amazonaws.com/datatable/stable/datatable-0.7.0/datatable-0.7.0-cp37-cp37m-linux_x86_64.whl && \ pip install "dask[complete]" diff --git a/tests/python/generate_models.py b/tests/python/generate_models.py index 4f02566a58a6..6376d802e659 100644 --- a/tests/python/generate_models.py +++ b/tests/python/generate_models.py @@ -59,6 +59,29 @@ def generate_regression_model(): reg.save_model(skl_json('reg')) +def generate_logistic_model(): + print('Logistic') + y = np.random.randint(0, 2, size=kRows) + assert y.max() == 1 and y.min() == 0 + + data = xgboost.DMatrix(X, label=y, weight=w) + booster = xgboost.train({'tree_method': 'hist', + 'num_parallel_tree': kForests, + 'max_depth': kMaxDepth, + 'objective': 'binary:logistic'}, + num_boost_round=kRounds, dtrain=data) + booster.save_model(booster_bin('logit')) + booster.save_model(booster_json('logit')) + + reg = xgboost.XGBClassifier(tree_method='hist', + num_parallel_tree=kForests, + max_depth=kMaxDepth, + n_estimators=kRounds) + reg.fit(X, y, w) + reg.save_model(skl_bin('logit')) + reg.save_model(skl_json('logit')) + + def generate_classification_model(): print('Classification') y = np.random.randint(0, kClasses, size=kRows) @@ -83,7 +106,7 @@ def generate_classification_model(): def generate_ranking_model(): print('Learning to Rank') y = np.random.randint(5, size=kRows) - w = np.random.randn(20) + w = np.random.uniform(size=20) g = np.repeat(50, 20) data = xgboost.DMatrix(X, y, weight=w) @@ -119,6 +142,7 @@ def write_versions(): os.mkdir(target_dir) generate_regression_model() + generate_logistic_model() generate_classification_model() generate_ranking_model() write_versions() diff --git a/tests/python/models/version b/tests/python/models/version deleted file mode 100644 index 194fa40f4d25..000000000000 --- a/tests/python/models/version +++ /dev/null @@ -1 +0,0 @@ -{'numpy': '1.16.4', 'xgboost': '1.0.0-SNAPSHOT'} \ No newline at end of file diff --git a/tests/python/models/xgboost-1.0.0-SNAPSHOT.cls.bin b/tests/python/models/xgboost-1.0.0-SNAPSHOT.cls.bin deleted file mode 100644 index 81029ac7a6e80fbb2798c0ad4f5d506dd8704859..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4891 zcmZQzV6bOlU|?Vd;(-LbP|I^mb4oI;iu2P-auX}qpn~a1B}J*JJU|}fK)R8D-5|HI zBdGxe>p+A5Kmc+V6A**w-xI8DfK&r0x?%21Q%<)7av4~G7#8Ao=lKo*)i;3jg8)c= zeazoIATuB$5V3=aV%9*jQPdT)%-svJ4_V!_Df4Y=pISNS1SU8yuJPUtGEXgcgZ)m1 zr%t<#UxC$^-6TaNT6u58?>KZ?qSWy6zAAQ1yDdl#mS;ffIajIgM^ECBq$HZ!CpQr_%R<;mch$wP~0cxa)Qe{Sa}T+E0s642hu3%HczN!H%Jb- z9G08F3@Teu)Ghd>g;8D?rEa!rzb6SUudSai2A8*ABA6Yv1@buiG@k*N&z$dX+rRy{ z-05HXbFhBj`5q4QPPIC1m%FnEXe~pToTbCr87-7b>cb*m3W8;P844D>(nwFW79~vM$}x{wO=x zy%~>I+MAt`cNV=h7o30oAKPHh=%wwnY^x_Y{|trvG1B5=DAvPezpkUSXFzGyUhs)M zdb;Fb=#0_&kV+J9PIUHL=wXRU~+?(*Pt@(5~CDI z4Br0)rAIF>9gOli{%rw9|C8zMLyY!}+v|OxJ}F9hz1wiXp3FdLhhJ9{oDY^m`j2x2 zw%d1}<#uFw#{f$E42Kow+AB#$I2$P32B-UN(TDBko>6r;fAj=6-3GkaW_NhY5r;-s z8*n@(l~f3ci%s-!$PKS|%2X5Td>&FNSjE4Vx#3gz`kiVt8Eg8?XBKo}I~APfWO F;sB-Y&Ex<8 diff --git a/tests/python/models/xgboost-1.0.0-SNAPSHOT.cls.json b/tests/python/models/xgboost-1.0.0-SNAPSHOT.cls.json deleted file mode 100644 index e8bc8bceb675..000000000000 --- a/tests/python/models/xgboost-1.0.0-SNAPSHOT.cls.json +++ /dev/null @@ -1 +0,0 @@ -{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"12","size_leaf_vector":"0"},"tree_info":[0,0,1,1,2,2,0,0,1,1,2,2],"trees":[{"base_weights":[-3.10496310703456402e-03,-1.14356480538845062e-01,1.12878747284412384e-01,-2.06186547875404358e-01,1.52976317331194878e-02,-5.11124253273010254e-01,1.29867866635322571e-01],"default_left":[false,false,false,false,false,false,false],"id":0,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[2.92025685310363770e+00,1.37833094596862793e+00,1.24116408824920654e+00,1.33592224121093750e+00,1.26208007335662842e+00,0.00000000000000000e+00,1.38571357727050781e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[1.14481402561068535e-02,2.25720971822738647e-01,-2.23139905929565430e+00,-3.09279840439558029e-02,2.29464494623243809e-03,-7.66686424612998962e-02,1.94801799952983856e-02],"split_indices":[1,0,0,0,0,0,0],"sum_hessian":[2.24317077636718750e+02,1.14501411437988281e+02,1.09815666198730469e+02,6.66821746826171875e+01,4.78192405700683594e+01,2.13970732688903809e+00,1.07675956726074219e+02],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-3.10496310703456402e-03,-1.14356480538845062e-01,1.12878747284412384e-01,-2.06186547875404358e-01,1.52976317331194878e-02,-5.11124253273010254e-01,1.29867866635322571e-01],"default_left":[false,false,false,false,false,false,false],"id":1,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[2.92025685310363770e+00,1.37833094596862793e+00,1.24116408824920654e+00,1.33592224121093750e+00,1.26208007335662842e+00,0.00000000000000000e+00,1.38571357727050781e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[1.14481402561068535e-02,2.25720971822738647e-01,-2.23139905929565430e+00,-3.09279840439558029e-02,2.29464494623243809e-03,-7.66686424612998962e-02,1.94801799952983856e-02],"split_indices":[1,0,0,0,0,0,0],"sum_hessian":[2.24317077636718750e+02,1.14501411437988281e+02,1.09815666198730469e+02,6.66821746826171875e+01,4.78192405700683594e+01,2.13970732688903809e+00,1.07675956726074219e+02],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-1.69028975069522858e-02,8.05607438087463379e-01,-2.48735304921865463e-02,1.18516474962234497e-01,-7.35604763031005859e-02],"default_left":[false,false,false,false,false],"id":2,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[1.49720096588134766e+00,0.00000000000000000e+00,1.57338166236877441e+00,1.47135305404663086e+00,1.12515950202941895e+00],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-2.83283019065856934e+00,1.20841123163700104e-01,-6.54938280582427979e-01,1.77774727344512939e-02,-1.10340714454650879e-02],"split_indices":[0,0,1,0,0],"sum_hessian":[2.24317077636718750e+02,1.19245672225952148e+00,2.23124618530273438e+02,5.61932411193847656e+01,1.66931381225585938e+02],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[-1.69028975069522858e-02,8.05607438087463379e-01,-2.48735304921865463e-02,1.18516474962234497e-01,-7.35604763031005859e-02],"default_left":[false,false,false,false,false],"id":3,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[1.49720096588134766e+00,0.00000000000000000e+00,1.57338166236877441e+00,1.47135305404663086e+00,1.12515950202941895e+00],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-2.83283019065856934e+00,1.20841123163700104e-01,-6.54938280582427979e-01,1.77774727344512939e-02,-1.10340714454650879e-02],"split_indices":[0,0,1,0,0],"sum_hessian":[2.24317077636718750e+02,1.19245672225952148e+00,2.23124618530273438e+02,5.61932411193847656e+01,1.66931381225585938e+02],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.00077202171087265e-02,9.98598039150238037e-02,-1.12991318106651306e-01,-2.68701147288084030e-02,2.14132413268089294e-01,-2.78286129236221313e-01,9.12456586956977844e-03],"default_left":[false,false,false,false,false,false,false],"id":4,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[2.40420699119567871e+00,2.05230426788330078e+00,1.71823227405548096e+00,3.13472294807434082e+00,2.89248418807983398e+00,9.98327255249023438e-01,1.37142074108123779e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[3.12443733215332031e-01,-5.36825239658355713e-01,6.99698746204376221e-01,-4.03051730245351791e-03,3.21198627352714539e-02,-4.17429208755493164e-02,1.36868492700159550e-03],"split_indices":[1,1,1,0,0,0,0],"sum_hessian":[2.24317077636718750e+02,1.40319183349609375e+02,8.39979019165039062e+01,6.68957366943359375e+01,7.34234390258789062e+01,3.51459007263183594e+01,4.88520011901855469e+01],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[2.00077202171087265e-02,9.98598039150238037e-02,-1.12991318106651306e-01,-2.68701147288084030e-02,2.14132413268089294e-01,-2.78286129236221313e-01,9.12456586956977844e-03],"default_left":[false,false,false,false,false,false,false],"id":5,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[2.40420699119567871e+00,2.05230426788330078e+00,1.71823227405548096e+00,3.13472294807434082e+00,2.89248418807983398e+00,9.98327255249023438e-01,1.37142074108123779e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[3.12443733215332031e-01,-5.36825239658355713e-01,6.99698746204376221e-01,-4.03051730245351791e-03,3.21198627352714539e-02,-4.17429208755493164e-02,1.36868492700159550e-03],"split_indices":[1,1,1,0,0,0,0],"sum_hessian":[2.24317077636718750e+02,1.40319183349609375e+02,8.39979019165039062e+01,6.68957366943359375e+01,7.34234390258789062e+01,3.51459007263183594e+01,4.88520011901855469e+01],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-3.14401555806398392e-03,-2.77371466159820557e-01,2.71770134568214417e-02,-5.22169768810272217e-01,-1.32588371634483337e-01,-1.92690286785364151e-02,1.96981996297836304e-01],"default_left":[false,false,false,false,false,false,false],"id":6,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.87940013408660889e+00,7.28317379951477051e-01,1.60969936847686768e+00,3.83746862411499023e-01,1.40333545207977295e+00,1.32722187042236328e+00,1.38107287883758545e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-1.36123311519622803e+00,-3.55613559484481812e-01,8.09594929218292236e-01,-7.83254653215408325e-02,-1.98882557451725006e-02,-2.89035448804497719e-03,2.95473001897335052e-02],"split_indices":[1,2,0,0,0,0,0],"sum_hessian":[2.24122543334960938e+02,2.15025730133056641e+01,2.02619964599609375e+02,7.02246618270874023e+00,1.44801063537597656e+01,1.59797698974609375e+02,4.28222694396972656e+01],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-3.14401555806398392e-03,-2.77371466159820557e-01,2.71770134568214417e-02,-5.22169768810272217e-01,-1.32588371634483337e-01,-1.92690286785364151e-02,1.96981996297836304e-01],"default_left":[false,false,false,false,false,false,false],"id":7,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.87940013408660889e+00,7.28317379951477051e-01,1.60969936847686768e+00,3.83746862411499023e-01,1.40333545207977295e+00,1.32722187042236328e+00,1.38107287883758545e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-1.36123311519622803e+00,-3.55613559484481812e-01,8.09594929218292236e-01,-7.83254653215408325e-02,-1.98882557451725006e-02,-2.89035448804497719e-03,2.95473001897335052e-02],"split_indices":[1,2,0,0,0,0,0],"sum_hessian":[2.24122543334960938e+02,2.15025730133056641e+01,2.02619964599609375e+02,7.02246618270874023e+00,1.44801063537597656e+01,1.59797698974609375e+02,4.28222694396972656e+01],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-1.22269820421934128e-02,7.03054547309875488e-01,-1.94818396121263504e-02,-3.24203968048095703e-02,3.11440438032150269e-01],"default_left":[false,false,false,false,false],"id":8,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[1.17855262756347656e+00,0.00000000000000000e+00,9.65734302997589111e-01,1.15091991424560547e+00,1.31937885284423828e+00],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-2.83283019065856934e+00,1.05458185076713562e-01,1.78177630901336670e+00,-4.86305961385369301e-03,4.67160679399967194e-02],"split_indices":[0,0,2,0,0],"sum_hessian":[2.23474166870117188e+02,1.28086733818054199e+00,2.22193298339843750e+02,2.14700851440429688e+02,7.49244642257690430e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[-1.22269820421934128e-02,7.03054547309875488e-01,-1.94818396121263504e-02,-3.24203968048095703e-02,3.11440438032150269e-01],"default_left":[false,false,false,false,false],"id":9,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[1.17855262756347656e+00,0.00000000000000000e+00,9.65734302997589111e-01,1.15091991424560547e+00,1.31937885284423828e+00],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-2.83283019065856934e+00,1.05458185076713562e-01,1.78177630901336670e+00,-4.86305961385369301e-03,4.67160679399967194e-02],"split_indices":[0,0,2,0,0],"sum_hessian":[2.23474166870117188e+02,1.28086733818054199e+00,2.22193298339843750e+02,2.14700851440429688e+02,7.49244642257690430e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[1.52873406186699867e-02,5.03631830215454102e-01,-6.68718665838241577e-03,-9.80335399508476257e-02,7.35435545444488525e-01,3.88425774872303009e-02,-1.30366414785385132e-01],"default_left":[false,false,false,false,false,false,false],"id":10,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[2.42684388160705566e+00,1.43019437789916992e+00,1.22756230831146240e+00,3.81559073925018311e-01,3.57241153717041016e-01,1.24453198909759521e+00,1.17859208583831787e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-2.00600242614746094e+00,-2.40636110305786133e+00,6.73008561134338379e-01,-1.47050311788916588e-02,1.10315337777137756e-01,5.82638662308454514e-03,-1.95549633353948593e-02],"split_indices":[1,1,2,0,0,0,0],"sum_hessian":[2.24835845947265625e+02,8.73767948150634766e+00,2.16098175048828125e+02,2.59061360359191895e+00,6.14706563949584961e+00,1.58453109741210938e+02,5.76450576782226562e+01],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[1.52873406186699867e-02,5.03631830215454102e-01,-6.68718665838241577e-03,-9.80335399508476257e-02,7.35435545444488525e-01,3.88425774872303009e-02,-1.30366414785385132e-01],"default_left":[false,false,false,false,false,false,false],"id":11,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[2.42684388160705566e+00,1.43019437789916992e+00,1.22756230831146240e+00,3.81559073925018311e-01,3.57241153717041016e-01,1.24453198909759521e+00,1.17859208583831787e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-2.00600242614746094e+00,-2.40636110305786133e+00,6.73008561134338379e-01,-1.47050311788916588e-02,1.10315337777137756e-01,5.82638662308454514e-03,-1.95549633353948593e-02],"split_indices":[1,1,2,0,0,0,0],"sum_hessian":[2.24835845947265625e+02,8.73767948150634766e+00,2.16098175048828125e+02,2.59061360359191895e+00,6.14706563949584961e+00,1.58453109741210938e+02,5.76450576782226562e+01],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"3","num_feature":"4"},"objective":{"name":"multi:softmax","softmax_multiclass_param":{"num_class":"3"}}},"version":[1,0,0]} \ No newline at end of file diff --git a/tests/python/models/xgboost-1.0.0-SNAPSHOT.ltr.bin b/tests/python/models/xgboost-1.0.0-SNAPSHOT.ltr.bin deleted file mode 100644 index c99505b854146d882bece5fee9cdc2f21cb28f05..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1799 zcmZQzV6bOl7+DPR6)z6#1DShMZ?D}|hUJbi3#@EG`j3l9+Hd~Z>iD|- zsVzu8fl1cBOQywvThR@yel#4Z7mn{mS5@0c$uZwRt4 zXU0=IriD`-vvxn*3sQfi_m|yNzLgH|pKP%M@j;;Y9=}88>*wHf>KWf-XR@@}k!!U+ pIRAj;^HrDFckKEM=UcKiI2cR(w*$$6+&UT`3=D%MK0tAT3;_SxINksN diff --git a/tests/python/models/xgboost-1.0.0-SNAPSHOT.ltr.json b/tests/python/models/xgboost-1.0.0-SNAPSHOT.ltr.json deleted file mode 100644 index a19374c3400d..000000000000 --- a/tests/python/models/xgboost-1.0.0-SNAPSHOT.ltr.json +++ /dev/null @@ -1 +0,0 @@ -{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"4","size_leaf_vector":"0"},"tree_info":[0,0,0,0],"trees":[{"base_weights":[2.18596185597164094e-09,-3.76773595809936523e-01,4.55630868673324585e-02,1.12075649201869965e-01,-1.93485423922538757e-01],"default_left":[false,false,false,false,false],"id":0,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[4.20947641134262085e-01,0.00000000000000000e+00,3.69498044252395630e-01,5.97973287105560303e-01,6.13317489624023438e-01],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-1.45796775817871094e+00,-5.65160401165485382e-02,8.68250608444213867e-01,1.68113484978675842e-02,-2.90228147059679031e-02],"split_indices":[3,0,1,0,0],"sum_hessian":[2.25207920074462891e+01,1.64538443088531494e+00,2.08754062652587891e+01,1.67469234466552734e+01,4.12848377227783203e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.18596185597164094e-09,-3.76773595809936523e-01,4.55630868673324585e-02,1.12075649201869965e-01,-1.93485423922538757e-01],"default_left":[false,false,false,false,false],"id":1,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[4.20947641134262085e-01,0.00000000000000000e+00,3.69498044252395630e-01,5.97973287105560303e-01,6.13317489624023438e-01],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-1.45796775817871094e+00,-5.65160401165485382e-02,8.68250608444213867e-01,1.68113484978675842e-02,-2.90228147059679031e-02],"split_indices":[3,0,1,0,0],"sum_hessian":[2.25207920074462891e+01,1.64538443088531494e+00,2.08754062652587891e+01,1.67469234466552734e+01,4.12848377227783203e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.31542762740843955e-09,-1.12662151455879211e-01,3.53309124708175659e-01,-4.52967911958694458e-01,-4.28877249360084534e-02,-1.19008123874664307e-01,4.98231500387191772e-01],"default_left":[false,false,false,false,false,false,false],"id":2,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.03438735008239746e+00,4.48428511619567871e-01,4.89362835884094238e-01,0.00000000000000000e+00,2.74164468050003052e-01,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5.69312453269958496e-01,-1.49666213989257812e+00,-3.32068562507629395e-01,-6.79451897740364075e-02,-6.43315911293029785e-03,-1.78512185811996460e-02,7.47347250580787659e-02],"split_indices":[1,1,0,0,0,0,0],"sum_hessian":[2.39866485595703125e+01,1.87036170959472656e+01,5.28303003311157227e+00,2.24795222282409668e+00,1.64556655883789062e+01,1.28239238262176514e+00,4.00063753128051758e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[2.31542762740843955e-09,-1.12662151455879211e-01,3.53309124708175659e-01,-4.52967911958694458e-01,-4.28877249360084534e-02,-1.19008123874664307e-01,4.98231500387191772e-01],"default_left":[false,false,false,false,false,false,false],"id":3,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.03438735008239746e+00,4.48428511619567871e-01,4.89362835884094238e-01,0.00000000000000000e+00,2.74164468050003052e-01,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5.69312453269958496e-01,-1.49666213989257812e+00,-3.32068562507629395e-01,-6.79451897740364075e-02,-6.43315911293029785e-03,-1.78512185811996460e-02,7.47347250580787659e-02],"split_indices":[1,1,0,0,0,0,0],"sum_hessian":[2.39866485595703125e+01,1.87036170959472656e+01,5.28303003311157227e+00,2.24795222282409668e+00,1.64556655883789062e+01,1.28239238262176514e+00,4.00063753128051758e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"4"},"objective":{"lambda_rank_param":{"fix_list_weight":"0","num_pairsample":"1"},"name":"rank:ndcg"}},"version":[1,0,0]} \ No newline at end of file diff --git a/tests/python/models/xgboost-1.0.0-SNAPSHOT.reg.bin b/tests/python/models/xgboost-1.0.0-SNAPSHOT.reg.bin deleted file mode 100644 index 7baeaf373a23847ea474c5d72aed5a2f7bbd30f7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1950 zcmZQzV6bOl7a#uX<|`oN@`J2ei0i~JUywTC^Z%2QpSOIEKV1I+{TV1 zI~YRtKM;W2#RSA4+PuNZ4oEdH0|}V>_B?&DAIN231!7o;zuLj>094-q(hmY4`NYEx z`#@$uL?B}PR#kg}W}~Rn&bbT{Ls1ucvCo#_=6Q$U=s(UHi#fseDX3p`sCY5K*=WQ4 zeIWG?$&(zmJ`Hn@+NZrA#AmpbZI}C;%kjdZ1z`0b4xe$bo!H~Nz4gLAko1LDJq~WX zAqB=h@5HP<^0{j>(EDA u5MO3SyF)=@nlszanPBrK1Wj|;W!>U@{rLwlALQ1-oIgfde1PH%82|v;MQ#cJ diff --git a/tests/python/models/xgboost-1.0.0-SNAPSHOT.reg.json b/tests/python/models/xgboost-1.0.0-SNAPSHOT.reg.json deleted file mode 100644 index b6890cea034f..000000000000 --- a/tests/python/models/xgboost-1.0.0-SNAPSHOT.reg.json +++ /dev/null @@ -1 +0,0 @@ -{"learner":{"attributes":{},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"4","size_leaf_vector":"0"},"tree_info":[0,0,0,0],"trees":[{"base_weights":[-5.37645816802978516e-01,-4.36891138553619385e-01,-6.70873284339904785e-01,-1.25496864318847656e+00,-4.07270163297653198e-01,-6.88224375247955322e-01,4.64901357889175415e-01],"default_left":[false,false,false,false,false,false,false],"id":0,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[6.49523925781250000e+00,6.53602600097656250e+00,4.57461547851562500e+00,2.30323791503906250e-01,6.39891815185546875e+00,4.40366363525390625e+00,2.28362298011779785e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[1.89942225813865662e-01,-1.81951093673706055e+00,2.12066125869750977e+00,-1.88245311379432678e-01,-6.10905252397060394e-02,-1.03233657777309418e-01,6.97352066636085510e-02],"split_indices":[1,0,0,0,0,0,0],"sum_hessian":[5.04713470458984375e+02,2.89816162109375000e+02,2.14897293090820312e+02,8.68150043487548828e+00,2.81134674072265625e+02,2.12051849365234375e+02,2.84543561935424805e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-5.37645816802978516e-01,-4.36891138553619385e-01,-6.70873284339904785e-01,-1.25496864318847656e+00,-4.07270163297653198e-01,-6.88224375247955322e-01,4.64901357889175415e-01],"default_left":[false,false,false,false,false,false,false],"id":1,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[6.49523925781250000e+00,6.53602600097656250e+00,4.57461547851562500e+00,2.30323791503906250e-01,6.39891815185546875e+00,4.40366363525390625e+00,2.28362298011779785e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[1.89942225813865662e-01,-1.81951093673706055e+00,2.12066125869750977e+00,-1.88245311379432678e-01,-6.10905252397060394e-02,-1.03233657777309418e-01,6.97352066636085510e-02],"split_indices":[1,0,0,0,0,0,0],"sum_hessian":[5.04713470458984375e+02,2.89816162109375000e+02,2.14897293090820312e+02,8.68150043487548828e+00,2.81134674072265625e+02,2.12051849365234375e+02,2.84543561935424805e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-3.77470612525939941e-01,3.31088960170745850e-01,-3.92237067222595215e-01,8.17872881889343262e-01,1.18046358227729797e-01,-3.00728023052215576e-01,-4.70518797636032104e-01],"default_left":[false,false,false,false,false,false,false],"id":2,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[5.42109680175781250e+00,1.03034389019012451e+00,3.41049194335937500e+00,0.00000000000000000e+00,1.19803142547607422e+00,4.23731803894042969e+00,4.69757843017578125e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-2.07929229736328125e+00,-5.09094715118408203e-01,-8.72411578893661499e-02,1.22680939733982086e-01,1.77069548517465591e-02,-4.51092049479484558e-02,-7.05778226256370544e-02],"split_indices":[3,0,3,0,0,0,0],"sum_hessian":[5.04713470458984375e+02,9.86623668670654297e+00,4.94847229003906250e+02,2.13924217224121094e+00,7.72699451446533203e+00,2.30380615234375000e+02,2.64466613769531250e+02],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-3.77470612525939941e-01,3.31088960170745850e-01,-3.92237067222595215e-01,8.17872881889343262e-01,1.18046358227729797e-01,-3.00728023052215576e-01,-4.70518797636032104e-01],"default_left":[false,false,false,false,false,false,false],"id":3,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[5.42109680175781250e+00,1.03034389019012451e+00,3.41049194335937500e+00,0.00000000000000000e+00,1.19803142547607422e+00,4.23731803894042969e+00,4.69757843017578125e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-2.07929229736328125e+00,-5.09094715118408203e-01,-8.72411578893661499e-02,1.22680939733982086e-01,1.77069548517465591e-02,-4.51092049479484558e-02,-7.05778226256370544e-02],"split_indices":[3,0,3,0,0,0,0],"sum_hessian":[5.04713470458984375e+02,9.86623668670654297e+00,4.94847229003906250e+02,2.13924217224121094e+00,7.72699451446533203e+00,2.30380615234375000e+02,2.64466613769531250e+02],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"4"},"objective":{"name":"reg:squarederror","reg_loss_param":{"scale_pos_weight":"1"}}},"version":[1,0,0]} \ No newline at end of file diff --git a/tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.cls.bin b/tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.cls.bin deleted file mode 100644 index 342dde2a919950eaa0c01806afc6ce9044859dc3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5641 zcmeHLeQXp(6kqxw7LXtb5=zp$oDmak6lgW^xOT3)0kP6*gPH>3bnb3@x3C|V-MuTN zU~7aBF_Q9O!ymR+ia{j=P_$rLpxBmx6fu6J3Z$Vb)`)xrP0&`U^QL$0UXR2-{G)B? zOD1#k=FNL=-}}9{v&V59O5r#znT`o`j1*0c8dgHGDLDEnOJ27H@n5rVjmH1$&gzmn*HxvzSGi%wOL@0fq35BhWIl1rr{7V+G3Q(LHf6`Tix z{R1!m(+oNWI0L*B=A-5pk`*>~v=ql?c1nJ=+BO<|qfzr`%<} zDeG4zU)EKJBD*f|e`WUM!1=2Fy=c+Ojr>-}(qkas`Q|Rvvk3858rH%yFuHZ)UiYN$ zla9k;8vYQ?T;9pg_&bF?5BNHs4~`p6`2dW?D2Bh-dL(ARDBuL}1~jwai55D=;rXqK zTRE^7s~M0_+xzhG;kcjHu+=pj_wG~K&5PeXjsW*J%Qmz60bO##PL%H|MDTl??=SXAxadP#&k0kVKT|DI6)HRFs>jgc# zS-<_GIN9A;G(}kQdI#&zY1c2HtGD0dZ~J@L`GUw|cirb3`NJ6(o2gkYobi4J0t8q zQ(Uwitv>XU;69VW?k|Dv_mSuy_!z1zm@eP%SqakRfbi#=Qmh1+1ihMxA-gmmBy zet!ZU^Yb$k`Zc~9O!)pxy4sy^pILO}7<`k)_3Ng2_03YzYgnjg3WthC!;}hqCUiczs0a3ngPv6nT~*Dm|($A6om{)DUyo4 z0ZI1ZGEwpeOv?^)MK%ehdV@oJBjhoN5|l0Ocr|%|g+1k9c1YN|Ba2~CzVnu<`7DQY z(T|B73|QO*mP{0n&%!gjL>6&SGZI|)5JSX9nr;%+D`K-e2tLy+Nh-@$FoY$@U(}LUCWavzm~vLk&9u+Uw9j7&4}&cm uS(HJMNJT0f7WSY#JQpq%i5b$VCa)G!A?w+c8?#>gG%tW5sZ^qfTK_MSFT<$- diff --git a/tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.cls.json b/tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.cls.json deleted file mode 100644 index 495226599986..000000000000 --- a/tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.cls.json +++ /dev/null @@ -1 +0,0 @@ -{"learner":{"attributes":{"scikit_learn":"{\"n_estimators\": 2, \"objective\": \"multi:softprob\", \"max_depth\": 2, \"learning_rate\": null, \"verbosity\": null, \"booster\": null, \"tree_method\": \"hist\", \"gamma\": null, \"min_child_weight\": null, \"max_delta_step\": null, \"subsample\": null, \"colsample_bytree\": null, \"colsample_bylevel\": null, \"colsample_bynode\": null, \"reg_alpha\": null, \"reg_lambda\": null, \"scale_pos_weight\": null, \"base_score\": null, \"missing\": NaN, \"num_parallel_tree\": 2, \"kwargs\": {}, \"random_state\": null, \"n_jobs\": null, \"monotone_constraints\": null, \"interaction_constraints\": null, \"importance_type\": \"gain\", \"gpu_id\": null, \"classes_\": [0, 1, 2], \"n_classes_\": 3, \"_le\": {\"classes_\": [0, 1, 2]}, \"_features_count\": 4, \"type\": \"XGBClassifier\"}"},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"12","size_leaf_vector":"0"},"tree_info":[0,0,1,1,2,2,0,0,1,1,2,2],"trees":[{"base_weights":[-3.10496310703456402e-03,-1.14356480538845062e-01,1.12878747284412384e-01,-2.06186547875404358e-01,1.52976317331194878e-02,-5.11124253273010254e-01,1.29867866635322571e-01],"default_left":[false,false,false,false,false,false,false],"id":0,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[2.92025685310363770e+00,1.37833094596862793e+00,1.24116408824920654e+00,1.33592224121093750e+00,1.26208007335662842e+00,0.00000000000000000e+00,1.38571357727050781e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[1.14481402561068535e-02,2.25720971822738647e-01,-2.23139905929565430e+00,-3.09279840439558029e-02,2.29464494623243809e-03,-7.66686424612998962e-02,1.94801799952983856e-02],"split_indices":[1,0,0,0,0,0,0],"sum_hessian":[2.24317077636718750e+02,1.14501411437988281e+02,1.09815666198730469e+02,6.66821746826171875e+01,4.78192405700683594e+01,2.13970732688903809e+00,1.07675956726074219e+02],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-3.10496310703456402e-03,-1.14356480538845062e-01,1.12878747284412384e-01,-2.06186547875404358e-01,1.52976317331194878e-02,-5.11124253273010254e-01,1.29867866635322571e-01],"default_left":[false,false,false,false,false,false,false],"id":1,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[2.92025685310363770e+00,1.37833094596862793e+00,1.24116408824920654e+00,1.33592224121093750e+00,1.26208007335662842e+00,0.00000000000000000e+00,1.38571357727050781e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[1.14481402561068535e-02,2.25720971822738647e-01,-2.23139905929565430e+00,-3.09279840439558029e-02,2.29464494623243809e-03,-7.66686424612998962e-02,1.94801799952983856e-02],"split_indices":[1,0,0,0,0,0,0],"sum_hessian":[2.24317077636718750e+02,1.14501411437988281e+02,1.09815666198730469e+02,6.66821746826171875e+01,4.78192405700683594e+01,2.13970732688903809e+00,1.07675956726074219e+02],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-1.69028975069522858e-02,8.05607438087463379e-01,-2.48735304921865463e-02,1.18516474962234497e-01,-7.35604763031005859e-02],"default_left":[false,false,false,false,false],"id":2,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[1.49720096588134766e+00,0.00000000000000000e+00,1.57338166236877441e+00,1.47135305404663086e+00,1.12515950202941895e+00],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-2.83283019065856934e+00,1.20841123163700104e-01,-6.54938280582427979e-01,1.77774727344512939e-02,-1.10340714454650879e-02],"split_indices":[0,0,1,0,0],"sum_hessian":[2.24317077636718750e+02,1.19245672225952148e+00,2.23124618530273438e+02,5.61932411193847656e+01,1.66931381225585938e+02],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[-1.69028975069522858e-02,8.05607438087463379e-01,-2.48735304921865463e-02,1.18516474962234497e-01,-7.35604763031005859e-02],"default_left":[false,false,false,false,false],"id":3,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[1.49720096588134766e+00,0.00000000000000000e+00,1.57338166236877441e+00,1.47135305404663086e+00,1.12515950202941895e+00],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-2.83283019065856934e+00,1.20841123163700104e-01,-6.54938280582427979e-01,1.77774727344512939e-02,-1.10340714454650879e-02],"split_indices":[0,0,1,0,0],"sum_hessian":[2.24317077636718750e+02,1.19245672225952148e+00,2.23124618530273438e+02,5.61932411193847656e+01,1.66931381225585938e+02],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.00077202171087265e-02,9.98598039150238037e-02,-1.12991318106651306e-01,-2.68701147288084030e-02,2.14132413268089294e-01,-2.78286129236221313e-01,9.12456586956977844e-03],"default_left":[false,false,false,false,false,false,false],"id":4,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[2.40420699119567871e+00,2.05230426788330078e+00,1.71823227405548096e+00,3.13472294807434082e+00,2.89248418807983398e+00,9.98327255249023438e-01,1.37142074108123779e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[3.12443733215332031e-01,-5.36825239658355713e-01,6.99698746204376221e-01,-4.03051730245351791e-03,3.21198627352714539e-02,-4.17429208755493164e-02,1.36868492700159550e-03],"split_indices":[1,1,1,0,0,0,0],"sum_hessian":[2.24317077636718750e+02,1.40319183349609375e+02,8.39979019165039062e+01,6.68957366943359375e+01,7.34234390258789062e+01,3.51459007263183594e+01,4.88520011901855469e+01],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[2.00077202171087265e-02,9.98598039150238037e-02,-1.12991318106651306e-01,-2.68701147288084030e-02,2.14132413268089294e-01,-2.78286129236221313e-01,9.12456586956977844e-03],"default_left":[false,false,false,false,false,false,false],"id":5,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[2.40420699119567871e+00,2.05230426788330078e+00,1.71823227405548096e+00,3.13472294807434082e+00,2.89248418807983398e+00,9.98327255249023438e-01,1.37142074108123779e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[3.12443733215332031e-01,-5.36825239658355713e-01,6.99698746204376221e-01,-4.03051730245351791e-03,3.21198627352714539e-02,-4.17429208755493164e-02,1.36868492700159550e-03],"split_indices":[1,1,1,0,0,0,0],"sum_hessian":[2.24317077636718750e+02,1.40319183349609375e+02,8.39979019165039062e+01,6.68957366943359375e+01,7.34234390258789062e+01,3.51459007263183594e+01,4.88520011901855469e+01],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-3.14401555806398392e-03,-2.77371466159820557e-01,2.71770134568214417e-02,-5.22169768810272217e-01,-1.32588371634483337e-01,-1.92690286785364151e-02,1.96981996297836304e-01],"default_left":[false,false,false,false,false,false,false],"id":6,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.87940013408660889e+00,7.28317379951477051e-01,1.60969936847686768e+00,3.83746862411499023e-01,1.40333545207977295e+00,1.32722187042236328e+00,1.38107287883758545e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-1.36123311519622803e+00,-3.55613559484481812e-01,8.09594929218292236e-01,-7.83254653215408325e-02,-1.98882557451725006e-02,-2.89035448804497719e-03,2.95473001897335052e-02],"split_indices":[1,2,0,0,0,0,0],"sum_hessian":[2.24122543334960938e+02,2.15025730133056641e+01,2.02619964599609375e+02,7.02246618270874023e+00,1.44801063537597656e+01,1.59797698974609375e+02,4.28222694396972656e+01],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-3.14401555806398392e-03,-2.77371466159820557e-01,2.71770134568214417e-02,-5.22169768810272217e-01,-1.32588371634483337e-01,-1.92690286785364151e-02,1.96981996297836304e-01],"default_left":[false,false,false,false,false,false,false],"id":7,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.87940013408660889e+00,7.28317379951477051e-01,1.60969936847686768e+00,3.83746862411499023e-01,1.40333545207977295e+00,1.32722187042236328e+00,1.38107287883758545e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-1.36123311519622803e+00,-3.55613559484481812e-01,8.09594929218292236e-01,-7.83254653215408325e-02,-1.98882557451725006e-02,-2.89035448804497719e-03,2.95473001897335052e-02],"split_indices":[1,2,0,0,0,0,0],"sum_hessian":[2.24122543334960938e+02,2.15025730133056641e+01,2.02619964599609375e+02,7.02246618270874023e+00,1.44801063537597656e+01,1.59797698974609375e+02,4.28222694396972656e+01],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[-1.22269820421934128e-02,7.03054547309875488e-01,-1.94818396121263504e-02,-3.24203968048095703e-02,3.11440438032150269e-01],"default_left":[false,false,false,false,false],"id":8,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[1.17855262756347656e+00,0.00000000000000000e+00,9.65734302997589111e-01,1.15091991424560547e+00,1.31937885284423828e+00],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-2.83283019065856934e+00,1.05458185076713562e-01,1.78177630901336670e+00,-4.86305961385369301e-03,4.67160679399967194e-02],"split_indices":[0,0,2,0,0],"sum_hessian":[2.23474166870117188e+02,1.28086733818054199e+00,2.22193298339843750e+02,2.14700851440429688e+02,7.49244642257690430e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[-1.22269820421934128e-02,7.03054547309875488e-01,-1.94818396121263504e-02,-3.24203968048095703e-02,3.11440438032150269e-01],"default_left":[false,false,false,false,false],"id":9,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[1.17855262756347656e+00,0.00000000000000000e+00,9.65734302997589111e-01,1.15091991424560547e+00,1.31937885284423828e+00],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-2.83283019065856934e+00,1.05458185076713562e-01,1.78177630901336670e+00,-4.86305961385369301e-03,4.67160679399967194e-02],"split_indices":[0,0,2,0,0],"sum_hessian":[2.23474166870117188e+02,1.28086733818054199e+00,2.22193298339843750e+02,2.14700851440429688e+02,7.49244642257690430e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[1.52873406186699867e-02,5.03631830215454102e-01,-6.68718665838241577e-03,-9.80335399508476257e-02,7.35435545444488525e-01,3.88425774872303009e-02,-1.30366414785385132e-01],"default_left":[false,false,false,false,false,false,false],"id":10,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[2.42684388160705566e+00,1.43019437789916992e+00,1.22756230831146240e+00,3.81559073925018311e-01,3.57241153717041016e-01,1.24453198909759521e+00,1.17859208583831787e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-2.00600242614746094e+00,-2.40636110305786133e+00,6.73008561134338379e-01,-1.47050311788916588e-02,1.10315337777137756e-01,5.82638662308454514e-03,-1.95549633353948593e-02],"split_indices":[1,1,2,0,0,0,0],"sum_hessian":[2.24835845947265625e+02,8.73767948150634766e+00,2.16098175048828125e+02,2.59061360359191895e+00,6.14706563949584961e+00,1.58453109741210938e+02,5.76450576782226562e+01],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[1.52873406186699867e-02,5.03631830215454102e-01,-6.68718665838241577e-03,-9.80335399508476257e-02,7.35435545444488525e-01,3.88425774872303009e-02,-1.30366414785385132e-01],"default_left":[false,false,false,false,false,false,false],"id":11,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[2.42684388160705566e+00,1.43019437789916992e+00,1.22756230831146240e+00,3.81559073925018311e-01,3.57241153717041016e-01,1.24453198909759521e+00,1.17859208583831787e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[-2.00600242614746094e+00,-2.40636110305786133e+00,6.73008561134338379e-01,-1.47050311788916588e-02,1.10315337777137756e-01,5.82638662308454514e-03,-1.95549633353948593e-02],"split_indices":[1,1,2,0,0,0,0],"sum_hessian":[2.24835845947265625e+02,8.73767948150634766e+00,2.16098175048828125e+02,2.59061360359191895e+00,6.14706563949584961e+00,1.58453109741210938e+02,5.76450576782226562e+01],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"3","num_feature":"4"},"objective":{"name":"multi:softprob","softmax_multiclass_param":{"num_class":"3"}}},"version":[1,0,0]} \ No newline at end of file diff --git a/tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.ltr.bin b/tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.ltr.bin deleted file mode 100644 index 8d7ff40116e1da353faeff931f7c9bcef65e154e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2445 zcmeHJO-L0{6rN0relUWFf+TxTi-^JsBGH-mf{HdSTD7QdcV=$$uHLzKn0wz-5kW}% zZzBjr5C%oRiy)y;pB4nIY9U0pX{8lNw5S%fv3ust^Nl97iFDzC;mkefJ3pN7%#UFh zR;5vtWwBlewJI(eV%}2EUB_Dy8$4Tugg}c6+1}qq0=-owK&6o;O#l6MS_|}5p|9oo zdU9)URI4tqgXnwXRHs(}4gS)MA0Iw#>6lr#@6RqZK%Qb`F*ft^%dkBE-t5~skQqO) zv983=kDFsw+vgV}kav7{!F*@j%be`5Z-n^Q8#Y@{fA(aiE=)Fp|4c=Vb$Ls7dd;>Y z5q@co|89;eVZgC0wtWPi^CRmmt-6xC*WWt|I~qMpr8mGt<--w;pS6$UJFXNUM{*z2 zZ^nk^?(Or2-r>1>+f#j`@$K1e>;A-OM$~MrgL>@xq*-yhFVj5qu>km2*M6CO)eq8N zCk9M7gUAo-(sb%mw5P|=RdfH{bD7nT_C(JQ_{H`+*2TeZGv|X`pB~M3=Wh)(4n+U} diff --git a/tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.ltr.json b/tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.ltr.json deleted file mode 100644 index 312768898d82..000000000000 --- a/tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.ltr.json +++ /dev/null @@ -1 +0,0 @@ -{"learner":{"attributes":{"scikit_learn":"{\"n_estimators\": 2, \"objective\": \"rank:ndcg\", \"max_depth\": 2, \"learning_rate\": null, \"verbosity\": null, \"booster\": null, \"tree_method\": \"hist\", \"gamma\": null, \"min_child_weight\": null, \"max_delta_step\": null, \"subsample\": null, \"colsample_bytree\": null, \"colsample_bylevel\": null, \"colsample_bynode\": null, \"reg_alpha\": null, \"reg_lambda\": null, \"scale_pos_weight\": null, \"base_score\": null, \"missing\": NaN, \"num_parallel_tree\": 2, \"kwargs\": {}, \"random_state\": null, \"n_jobs\": null, \"monotone_constraints\": null, \"interaction_constraints\": null, \"importance_type\": \"gain\", \"gpu_id\": null, \"type\": \"XGBRanker\"}"},"gradient_booster":{"model":{"gbtree_model_param":{"num_trees":"4","size_leaf_vector":"0"},"tree_info":[0,0,0,0],"trees":[{"base_weights":[2.18596185597164094e-09,-3.76773595809936523e-01,4.55630868673324585e-02,1.12075649201869965e-01,-1.93485423922538757e-01],"default_left":[false,false,false,false,false],"id":0,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[4.20947641134262085e-01,0.00000000000000000e+00,3.69498044252395630e-01,5.97973287105560303e-01,6.13317489624023438e-01],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-1.45796775817871094e+00,-5.65160401165485382e-02,8.68250608444213867e-01,1.68113484978675842e-02,-2.90228147059679031e-02],"split_indices":[3,0,1,0,0],"sum_hessian":[2.25207920074462891e+01,1.64538443088531494e+00,2.08754062652587891e+01,1.67469234466552734e+01,4.12848377227783203e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.18596185597164094e-09,-3.76773595809936523e-01,4.55630868673324585e-02,1.12075649201869965e-01,-1.93485423922538757e-01],"default_left":[false,false,false,false,false],"id":1,"leaf_child_counts":[1,0,2,0,0],"left_children":[1,-1,3,-1,-1],"loss_changes":[4.20947641134262085e-01,0.00000000000000000e+00,3.69498044252395630e-01,5.97973287105560303e-01,6.13317489624023438e-01],"parents":[2147483647,0,0,2,2],"right_children":[2,-1,4,-1,-1],"split_conditions":[-1.45796775817871094e+00,-5.65160401165485382e-02,8.68250608444213867e-01,1.68113484978675842e-02,-2.90228147059679031e-02],"split_indices":[3,0,1,0,0],"sum_hessian":[2.25207920074462891e+01,1.64538443088531494e+00,2.08754062652587891e+01,1.67469234466552734e+01,4.12848377227783203e+00],"tree_param":{"num_feature":"4","num_nodes":"5","size_leaf_vector":"0"}},{"base_weights":[2.31542762740843955e-09,-1.12662151455879211e-01,3.53309124708175659e-01,-4.52967911958694458e-01,-4.28877249360084534e-02,-1.19008123874664307e-01,4.98231500387191772e-01],"default_left":[false,false,false,false,false,false,false],"id":2,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.03438735008239746e+00,4.48428511619567871e-01,4.89362835884094238e-01,0.00000000000000000e+00,2.74164468050003052e-01,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5.69312453269958496e-01,-1.49666213989257812e+00,-3.32068562507629395e-01,-6.79451897740364075e-02,-6.43315911293029785e-03,-1.78512185811996460e-02,7.47347250580787659e-02],"split_indices":[1,1,0,0,0,0,0],"sum_hessian":[2.39866485595703125e+01,1.87036170959472656e+01,5.28303003311157227e+00,2.24795222282409668e+00,1.64556655883789062e+01,1.28239238262176514e+00,4.00063753128051758e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}},{"base_weights":[2.31542762740843955e-09,-1.12662151455879211e-01,3.53309124708175659e-01,-4.52967911958694458e-01,-4.28877249360084534e-02,-1.19008123874664307e-01,4.98231500387191772e-01],"default_left":[false,false,false,false,false,false,false],"id":3,"leaf_child_counts":[0,2,2,0,0,0,0],"left_children":[1,3,5,-1,-1,-1,-1],"loss_changes":[1.03438735008239746e+00,4.48428511619567871e-01,4.89362835884094238e-01,0.00000000000000000e+00,2.74164468050003052e-01,0.00000000000000000e+00,0.00000000000000000e+00],"parents":[2147483647,0,0,1,1,2,2],"right_children":[2,4,6,-1,-1,-1,-1],"split_conditions":[5.69312453269958496e-01,-1.49666213989257812e+00,-3.32068562507629395e-01,-6.79451897740364075e-02,-6.43315911293029785e-03,-1.78512185811996460e-02,7.47347250580787659e-02],"split_indices":[1,1,0,0,0,0,0],"sum_hessian":[2.39866485595703125e+01,1.87036170959472656e+01,5.28303003311157227e+00,2.24795222282409668e+00,1.64556655883789062e+01,1.28239238262176514e+00,4.00063753128051758e+00],"tree_param":{"num_feature":"4","num_nodes":"7","size_leaf_vector":"0"}}]},"name":"gbtree"},"learner_model_param":{"base_score":"0.500000","num_class":"0","num_feature":"4"},"objective":{"lambda_rank_param":{"fix_list_weight":"0","num_pairsample":"1"},"name":"rank:ndcg"}},"version":[1,0,0]} \ No newline at end of file diff --git a/tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.reg.bin b/tests/python/models/xgboost_scikit-1.0.0-SNAPSHOT.reg.bin deleted file mode 100644 index 1be34798dbb26e86dd5516d92ad321c7e1cd933b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2606 zcmeHIT}TvB6rO(kDf**c0xOnJdk|?NqW_Q`~2YSmOFU*`d-}&x2XU@52 zM$@$N#aenT$jG@6t1>}3iIs{o3Cu~9aL#ys29KGN69SpeWtM-c4CGc2AZ05Xfd2a9 zFSQ`Ah3W#iD^*#hL~4<`W%GM7GS;tDEl*w9ijO}k&=o%g`(c3{59od1BaNi7m8EBU zmG3FMVmb`i6yC9+D-rG8YyIfSpT?G3OZ<4D%|m+gSf^2P?_(eEb?d7BVC1+_*H_#R zy7s=I%o$x;F?h4b=T8j0(j#3R#>4Z2ePA9O@6h)zuQ8I_clh>Cn^x;LigxOk3%{UNuDs~O`ii#p+hs5hseN@ir<2B^;g|lr zC`haQQmAh@b)+1g*PiGuzi3SA2cLh7fWGef1>M7^jr{N3zQ4}uYx?6o?Z(^D315d? z|K<7k-}(VP3ohsntYvAT3CnJXE(Vl8rY-?gD~~tfiwZ4*fFC4Ojwu_9RcRc%QRb)$Oew#M>X`|gOgkLrGRa(mEanQyvF*x$ zRcVA{)pn*zJuAWS7?;?!2$D$;a#mfkUB6pi0@={NOr)&6_CRGdiE$zX Date: Tue, 11 Feb 2020 09:54:31 +0800 Subject: [PATCH 02/10] Remove redundant test branch. --- tests/python/test_model_compatibility.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/python/test_model_compatibility.py b/tests/python/test_model_compatibility.py index 651ae42ad629..3ab85c74be8a 100644 --- a/tests/python/test_model_compatibility.py +++ b/tests/python/test_model_compatibility.py @@ -34,12 +34,8 @@ def run_booster_check(booster, name): assert len(booster.get_dump()) == gm.kForests * gm.kRounds assert float( config['learner']['learner_model_param']['base_score']) == 0.5 - if name.find('0.90') != -1: - assert config['learner']['learner_train_param'][ - 'objective'] == 'reg:squarederror' - else: - assert config['learner']['learner_train_param'][ - 'objective'] == 'reg:squarederror' + assert config['learner']['learner_train_param'][ + 'objective'] == 'reg:squarederror' def run_scikit_model_check(name, path): From abe6c318359c263a88ec3ab8298d7bff7fb198b2 Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 11 Feb 2020 09:56:48 +0800 Subject: [PATCH 03/10] Remove formatting. --- python-package/xgboost/core.py | 99 +++++++++++++--------------------- 1 file changed, 38 insertions(+), 61 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index b7baf22ad76d..f134c0399999 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -896,12 +896,11 @@ def slice(self, rindex, allow_groups=False): res = DMatrix(None, feature_names=self.feature_names, feature_types=self.feature_types) res.handle = ctypes.c_void_p() - _check_call(_LIB.XGDMatrixSliceDMatrixEx( - self.handle, - c_array(ctypes.c_int, rindex), - c_bst_ulong(len(rindex)), - ctypes.byref(res.handle), - ctypes.c_int(1 if allow_groups else 0))) + _check_call(_LIB.XGDMatrixSliceDMatrixEx(self.handle, + c_array(ctypes.c_int, rindex), + c_bst_ulong(len(rindex)), + ctypes.byref(res.handle), + ctypes.c_int(1 if allow_groups else 0))) return res @property @@ -955,8 +954,7 @@ def feature_names(self, feature_names): if not all(isinstance(f, STRING_TYPES) and not any(x in f for x in set(('[', ']', '<'))) for f in feature_names): - raise ValueError('feature_names must be string, and may not ' - 'contain [, ] or <') + raise ValueError('feature_names must be string, and may not contain [, ] or <') else: # reset feature_types also self.feature_types = None @@ -998,8 +996,7 @@ def feature_types(self, feature_types): valid = ('int', 'float', 'i', 'q') if not all(isinstance(f, STRING_TYPES) and f in valid for f in feature_types): - raise ValueError( - 'All feature_names must be {int, float, i, q}') + raise ValueError('All feature_names must be {int, float, i, q}') self._feature_types = feature_types @@ -1027,8 +1024,7 @@ def __init__(self, params=None, cache=(), model_file=None): """ for d in cache: if not isinstance(d, DMatrix): - raise TypeError('invalid cache item: {}'.format( - type(d).__name__), cache) + raise TypeError('invalid cache item: {}'.format(type(d).__name__), cache) self._validate_features(d) dmats = c_array(ctypes.c_void_p, [d.handle for d in cache]) @@ -1037,7 +1033,7 @@ def __init__(self, params=None, cache=(), model_file=None): ctypes.byref(self.handle))) if isinstance(params, dict) and \ - 'validate_parameters' not in params.keys(): + 'validate_parameters' not in params.keys(): params['validate_parameters'] = 1 self.set_param(params or {}) if (params is not None) and ('booster' in params): @@ -1166,8 +1162,7 @@ def attr(self, key): Returns ------- value : str - The attribute value of the key, returns None if attribute do not - exist. + The attribute value of the key, returns None if attribute do not exist. """ ret = ctypes.c_char_p() success = ctypes.c_int() @@ -1182,8 +1177,8 @@ def attributes(self): Returns ------- - result : dictionary of attribute_name: attribute_value pairs of - strings. Returns an empty dict if there's no attributes. + result : dictionary of attribute_name: attribute_value pairs of strings. + Returns an empty dict if there's no attributes. """ length = c_bst_ulong() sarr = ctypes.POINTER(ctypes.c_char_p)() @@ -1199,8 +1194,7 @@ def set_attr(self, **kwargs): Parameters ---------- **kwargs - The attributes to set. Setting a value to None deletes an - attribute. + The attributes to set. Setting a value to None deletes an attribute. """ for key, value in kwargs.items(): if value is not None: @@ -1273,11 +1267,9 @@ def boost(self, dtrain, grad, hess): """ if len(grad) != len(hess): - raise ValueError('grad / hess length mismatch: {} / {}'.format( - len(grad), len(hess))) + raise ValueError('grad / hess length mismatch: {} / {}'.format(len(grad), len(hess))) if not isinstance(dtrain, DMatrix): - raise TypeError('invalid training matrix: {}'.format( - type(dtrain).__name__)) + raise TypeError('invalid training matrix: {}'.format(type(dtrain).__name__)) self._validate_features(dtrain) _check_call(_LIB.XGBoosterBoostOneIter(self.handle, dtrain.handle, @@ -1627,16 +1619,14 @@ def get_fscore(self, fmap=''): .. note:: Feature importance is defined only for tree boosters - Feature importance is only defined when the decision tree model is - chosen as base learner (`booster=gbtree`). It is not defined for - other base learner types, such as linear learners - (`booster=gblinear`). + Feature importance is only defined when the decision tree model is chosen as base + learner (`booster=gbtree`). It is not defined for other base learner types, such + as linear learners (`booster=gblinear`). .. note:: Zero-importance features will not be included - Keep in mind that this function does not include zero-importance - feature, i.e. those features that have not been used in any split - conditions. + Keep in mind that this function does not include zero-importance feature, i.e. + those features that have not been used in any split conditions. Parameters ---------- @@ -1650,22 +1640,17 @@ def get_score(self, fmap='', importance_type='weight'): """Get feature importance of each feature. Importance type can be defined as: - * 'weight': the number of times a feature is used to split the data - across all trees. + * 'weight': the number of times a feature is used to split the data across all trees. * 'gain': the average gain across all splits the feature is used in. - * 'cover': the average coverage across all splits the feature is used - in. - * 'total_gain': the total gain across all splits the feature is used - in. - * 'total_cover': the total coverage across all splits the feature is - used in. + * 'cover': the average coverage across all splits the feature is used in. + * 'total_gain': the total gain across all splits the feature is used in. + * 'total_cover': the total coverage across all splits the feature is used in. .. note:: Feature importance is defined only for tree boosters - Feature importance is only defined when the decision tree - model is chosen as base learner (`booster=gbtree`). It is - not defined for other base learner types, such as linear - learners (`booster=gblinear`). + Feature importance is only defined when the decision tree model is chosen as base + learner (`booster=gbtree`). It is not defined for other base learner types, such + as linear learners (`booster=gblinear`). Parameters ---------- @@ -1673,17 +1658,13 @@ def get_score(self, fmap='', importance_type='weight'): The name of feature map file. importance_type: str, default 'weight' One of the importance types defined above. - """ fmap = os_fspath(fmap) - if getattr(self, 'booster', None) is not None and self.booster not in { - 'gbtree', 'dart'}: - raise ValueError( - 'Feature importance is not defined for Booster type {}' - .format(self.booster)) + if getattr(self, 'booster', None) is not None and self.booster not in {'gbtree', 'dart'}: + raise ValueError('Feature importance is not defined for Booster type {}' + .format(self.booster)) - allowed_importance_types = ['weight', 'gain', 'cover', 'total_gain', - 'total_cover'] + allowed_importance_types = ['weight', 'gain', 'cover', 'total_gain', 'total_cover'] if importance_type not in allowed_importance_types: msg = ("importance_type mismatch, got '{}', expected one of " + repr(allowed_importance_types)) @@ -1735,8 +1716,7 @@ def get_score(self, fmap='', importance_type='weight'): if len(arr) == 1: continue - # look for the closing bracket, extract only info within that - # bracket + # look for the closing bracket, extract only info within that bracket fid = arr[1].split(']') # extract gain or cover from string after closing bracket @@ -1763,9 +1743,9 @@ def get_score(self, fmap='', importance_type='weight'): def trees_to_dataframe(self, fmap=''): """Parse a boosted tree model text dump into a pandas DataFrame structure. - This feature is only defined when the decision tree model is chosen as - base learner (`booster in {gbtree, dart}`). It is not defined for other - base learner types, such as linear learners (`booster=gblinear`). + This feature is only defined when the decision tree model is chosen as base + learner (`booster in {gbtree, dart}`). It is not defined for other base learner + types, such as linear learners (`booster=gblinear`). Parameters ---------- @@ -1778,8 +1758,7 @@ def trees_to_dataframe(self, fmap=''): raise Exception(('pandas must be available to use this method.' 'Install pandas before calling again.')) - if getattr(self, 'booster', None) is not None and self.booster not in { - 'gbtree', 'dart'}: + if getattr(self, 'booster', None) is not None and self.booster not in {'gbtree', 'dart'}: raise ValueError('This method is not defined for Booster type {}' .format(self.booster)) @@ -1835,8 +1814,7 @@ def trees_to_dataframe(self, fmap=''): gains.append(float(stats[7])) covers.append(float(stats[9])) - ids = [str(t_id) + '-' + str(n_id) - for t_id, n_id in zip(tree_ids, node_ids)] + ids = [str(t_id) + '-' + str(n_id) for t_id, n_id in zip(tree_ids, node_ids)] df = DataFrame({'Tree': tree_ids, 'Node': node_ids, 'ID': ids, 'Feature': fids, 'Split': splits, 'Yes': y_directs, 'No': n_directs, 'Missing': missings, 'Gain': gains, @@ -1916,6 +1894,5 @@ def get_split_value_histogram(self, feature, fmap='', bins=None, return DataFrame(nph, columns=['SplitValue', 'Count']) if as_pandas and not PANDAS_INSTALLED: sys.stderr.write( - "Returning histogram as ndarray (as_pandas == True, but pandas" - " is not installed).") + "Returning histogram as ndarray (as_pandas == True, but pandas is not installed).") return nph From 2e96a79e9251d6f18e8177d08048f69db05f4b2c Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 11 Feb 2020 11:21:29 +0800 Subject: [PATCH 04/10] Save the metric for now. --- src/learner.cc | 26 ++++++++++++++++++++++---- tests/cpp/test_learner.cc | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/src/learner.cc b/src/learner.cc index ea9d610f50f2..bed9e8f4523e 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -511,10 +511,11 @@ class LearnerImpl : public Learner { bool warn_old_model { false }; if (attributes_.find("count_poisson_max_delta_step") != attributes_.cend()) { // Loading model from < 1.0.0, objective is not saved. - cfg_["max_delta_step"] = attributes_["count_poisson_max_delta_step"]; + cfg_["max_delta_step"] = attributes_.at("count_poisson_max_delta_step"); attributes_.erase("count_poisson_max_delta_step"); - } else { warn_old_model = true; + } else { + warn_old_model = false; } if (attributes_.find("version") != attributes_.cend()) { @@ -533,9 +534,16 @@ class LearnerImpl : public Learner { obj_->LoadConfig(j_obj); attributes_.erase("objective"); } else { - // Similar to JSON model IO, we save the objective. warn_old_model = true; } + if (attributes_.find("metrics") != attributes_.cend()) { + auto metrics_str = attributes_.at("metrics"); + std::vector names { common::Split(metrics_str, ';') }; + attributes_.erase("metrics"); + for (auto const& n : names) { + this->SetParam(kEvalMetric, n); + } + } if (warn_old_model) { LOG(WARNING) << "Loading model from XGBoost < 1.0.0, consider saving it " @@ -584,13 +592,23 @@ class LearnerImpl : public Learner { } } { - // Save the objective. + // Similar to JSON model IO, we save the objective. Json j_obj { Object() }; obj_->SaveConfig(&j_obj); std::string obj_doc; Json::Dump(j_obj, &obj_doc); extra_attr.emplace_back("objective", obj_doc); } + // As of 1.0.0, JVM Package and R Package uses Save/Load model for serialization. + // Remove this part once they are ported to use actual serialization methods. + if (mparam.contain_eval_metrics != 0) { + std::stringstream os; + for (auto& ev : metrics_) { + os << ev->Name() << ";"; + } + extra_attr.emplace_back("metrics", os.str()); + } + fo->Write(&mparam, sizeof(LearnerModelParamLegacy)); fo->Write(tparam_.objective); fo->Write(tparam_.booster); diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index c04bca61e31e..3588f8be30b9 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -180,6 +180,39 @@ TEST(Learner, JsonModelIO) { delete pp_dmat; } +TEST(Learner, BinaryModelIO) { + size_t constexpr kRows = 8; + int32_t constexpr kIters = 4; + auto pp_dmat = CreateDMatrix(kRows, 10, 0); + std::shared_ptr p_dmat {*pp_dmat}; + p_dmat->Info().labels_.Resize(kRows); + + std::unique_ptr learner{Learner::Create({p_dmat})}; + learner->SetParam("eval_metric", "rmsle"); + learner->Configure(); + for (int32_t iter = 0; iter < kIters; ++iter) { + learner->UpdateOneIter(iter, p_dmat.get()); + } + dmlc::TemporaryDirectory tempdir; + std::string const fname = tempdir.path + "binary_model_io.bin"; + { + // Make sure the write is complete before loading. + std::unique_ptr fo(dmlc::Stream::Create(fname.c_str(), "w")); + learner->SaveModel(fo.get()); + } + + learner.reset(Learner::Create({p_dmat})); + std::unique_ptr fi(dmlc::Stream::Create(fname.c_str(), "r")); + learner->LoadModel(fi.get()); + learner->Configure(); + Json config { Object() }; + learner->SaveConfig(&config); + std::string config_str; + Json::Dump(config, &config_str); + ASSERT_NE(config_str.find("rmsle"), std::string::npos); + ASSERT_EQ(config_str.find("WARNING"), std::string::npos); +} + #if defined(XGBOOST_USE_CUDA) // Tests for automatic GPU configuration. TEST(Learner, GPUConfiguration) { From b7a58d148f624465f53b848351d247bf17fccb36 Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 11 Feb 2020 12:05:09 +0800 Subject: [PATCH 05/10] Fix mem leak in test. --- CMakeLists.txt | 2 +- tests/cpp/test_learner.cc | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c7075928e577..12f6f06e6421 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,7 @@ option(USE_SANITIZER "Use santizer flags" OFF) option(SANITIZER_PATH "Path to sanitizes.") set(ENABLED_SANITIZERS "address" "leak" CACHE STRING "Semicolon separated list of sanitizer names. E.g 'address;leak'. Supported sanitizers are -address, leak and thread.") +address, leak, undefined and thread.") ## Plugins option(PLUGIN_LZ4 "Build lz4 plugin" OFF) option(PLUGIN_DENSE_PARSER "Build dense parser plugin" OFF) diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc index 3588f8be30b9..21aad10aff93 100644 --- a/tests/cpp/test_learner.cc +++ b/tests/cpp/test_learner.cc @@ -211,6 +211,8 @@ TEST(Learner, BinaryModelIO) { Json::Dump(config, &config_str); ASSERT_NE(config_str.find("rmsle"), std::string::npos); ASSERT_EQ(config_str.find("WARNING"), std::string::npos); + + delete pp_dmat; } #if defined(XGBOOST_USE_CUDA) From 81a60a0a6fac90559aae3b192906585ea6e6cf0d Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 12 Feb 2020 03:14:27 +0800 Subject: [PATCH 06/10] Save version as a binary field. --- src/learner.cc | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/learner.cc b/src/learner.cc index bed9e8f4523e..29ab2442b4ef 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -4,6 +4,8 @@ * \brief Implementation of learning algorithm. * \author Tianqi Chen */ +#include +#include #include #include @@ -67,19 +69,26 @@ struct LearnerModelParamLegacy : public dmlc::Parameter /* \brief global bias */ bst_float base_score; /* \brief number of features */ - unsigned num_feature; + uint32_t num_feature; /* \brief number of classes, if it is multi-class classification */ - int num_class; + int32_t num_class; /*! \brief Model contain additional properties */ - int contain_extra_attrs; + int32_t contain_extra_attrs; /*! \brief Model contain eval metrics */ - int contain_eval_metrics; + int32_t contain_eval_metrics; + /*! \brief the version of XGBoost. */ + uint32_t major_version; + uint32_t minor_version; /*! \brief reserved field */ - int reserved[29]; + int reserved[27]; /*! \brief constructor */ LearnerModelParamLegacy() { std::memset(this, 0, sizeof(LearnerModelParamLegacy)); base_score = 0.5f; + major_version = std::get<0>(Version::Self()); + minor_version = std::get<1>(Version::Self()); + static_assert(sizeof(LearnerModelParamLegacy) == 136, + "Do not change the size of this struct, as it will break binary IO."); } // Skip other legacy fields. Json ToJson() const { @@ -487,6 +496,7 @@ class LearnerImpl : public Learner { // read parameter CHECK_EQ(fi->Read(&mparam_, sizeof(mparam_)), sizeof(mparam_)) << "BoostLearner: wrong model format"; + CHECK(fi->Read(&tparam_.objective)) << "BoostLearner: wrong model format"; CHECK(fi->Read(&tparam_.booster)) << "BoostLearner: wrong model format"; @@ -518,10 +528,9 @@ class LearnerImpl : public Learner { warn_old_model = false; } - if (attributes_.find("version") != attributes_.cend()) { + if (mparam_.major_version >= 1) { learner_model_param_ = LearnerModelParam(mparam_, obj_->ProbToMargin(mparam_.base_score)); - attributes_.erase("version"); } else { // Before 1.0.0, base_score is saved as a transformed value, and there's no version // attribute in the saved model. @@ -550,6 +559,10 @@ class LearnerImpl : public Learner { "again for improved compatibility"; } + // Renew the version. + mparam_.major_version = std::get<0>(Version::Self()); + mparam_.minor_version = std::get<1>(Version::Self()); + cfg_["num_class"] = common::ToString(mparam_.num_class); cfg_["num_feature"] = common::ToString(mparam_.num_feature); @@ -573,7 +586,6 @@ class LearnerImpl : public Learner { LearnerModelParamLegacy mparam = mparam_; // make a copy to potentially modify std::vector > extra_attr; mparam.contain_extra_attrs = 1; - extra_attr.emplace_back(std::make_pair("version", Version::String(Version::Self()))); { std::vector saved_params; From 63b2bdfe4d2cf8bab4ea8d0d408280ca72305c0c Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 12 Feb 2020 11:31:46 +0800 Subject: [PATCH 07/10] Fix the auto include. --- src/learner.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/learner.cc b/src/learner.cc index 29ab2442b4ef..e4f1f3de444c 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -4,8 +4,6 @@ * \brief Implementation of learning algorithm. * \author Tianqi Chen */ -#include -#include #include #include From f64686e111026e9ef225fc5c03dc65a6465708bb Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 12 Feb 2020 12:21:14 +0800 Subject: [PATCH 08/10] Typos. --- doc/tutorials/saving_model.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/tutorials/saving_model.rst b/doc/tutorials/saving_model.rst index 7187452774ec..7d416ccb1bbc 100644 --- a/doc/tutorials/saving_model.rst +++ b/doc/tutorials/saving_model.rst @@ -94,8 +94,8 @@ models are valuable. One way to restore it in the future is to load it back wit specific version of Python and XGBoost, export the model by calling `save_model`. To help easing the mitigation, we created a simple script for converting pickled XGBoost 0.90 Scikit-Learn interface object to XGBoost 1.0.0 native model. Please note that the script -suites simple use cases, and it's adviced not to use pickle when stability is needed. -It's located in ``xgboost/doc/ptyhon`` with the name ``convert_090to100.py``. See +suits simple use cases, and it's advised not to use pickle when stability is needed. +It's located in ``xgboost/doc/python`` with the name ``convert_090to100.py``. See comments in the script for more details. ******************************************************** From 17dccfe90753f0b9fc8cdbe13eaea83c1aafff13 Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 12 Feb 2020 12:33:41 +0800 Subject: [PATCH 09/10] Make the argument as required. --- doc/python/convert_090to100.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/python/convert_090to100.py b/doc/python/convert_090to100.py index 21aa0fcdc7db..70f215f5a368 100644 --- a/doc/python/convert_090to100.py +++ b/doc/python/convert_090to100.py @@ -70,7 +70,8 @@ def xgboost_skl_90to100(skl_model): ' XGBoost 0.90 to XGBoost 1.0.0 model (not pickle).') ) parser.add_argument('--old-pickle', type=str, - help='Path to old pickle file.') + help='Path to old pickle file.', + required=True) args = parser.parse_args() xgboost_skl_90to100(args.old_pickle) From f482c9dd0800713f4d8b849afecfc235ffa8324a Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 12 Feb 2020 12:37:45 +0800 Subject: [PATCH 10/10] Better name. --- doc/python/convert_090to100.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/python/convert_090to100.py b/doc/python/convert_090to100.py index 70f215f5a368..135489b09d36 100644 --- a/doc/python/convert_090to100.py +++ b/doc/python/convert_090to100.py @@ -54,7 +54,7 @@ def xgboost_skl_90to100(skl_model): # Save it into a native model. i = 0 while True: - path = str(i) + '_xgboost_model_from_old_pickle.model' + path = 'xgboost_native_model_from_' + skl_model + '-' + str(i) + '.bin' if os.path.exists(path): i += 1 continue @@ -67,11 +67,13 @@ def xgboost_skl_90to100(skl_model): ' that generates this pickle.') parser = argparse.ArgumentParser( description=('A simple script to convert pickle generated by' - ' XGBoost 0.90 to XGBoost 1.0.0 model (not pickle).') - ) - parser.add_argument('--old-pickle', type=str, - help='Path to old pickle file.', - required=True) + ' XGBoost 0.90 to XGBoost 1.0.0 model (not pickle).')) + parser.add_argument( + '--old-pickle', + type=str, + help='Path to old pickle file of Scikit-Learn interface object. ' + 'Will output a native model converted from this pickle file', + required=True) args = parser.parse_args() xgboost_skl_90to100(args.old_pickle)