From 47cbee22af0c789c394c550716b276871173a614 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Mon, 11 Dec 2023 14:41:17 +0100 Subject: [PATCH] more fixes Signed-off-by: Xavier Dupre --- onnxmltools/convert/xgboost/_parse.py | 3 ++- onnxmltools/convert/xgboost/common.py | 11 ++++++----- .../convert/xgboost/operator_converters/XGBoost.py | 14 +++++++++++--- .../xgboost/shape_calculators/Classifier.py | 8 +++++++- onnxmltools/utils/utils_backend.py | 2 ++ tests/xgboost/test_xgboost_converters.py | 1 + 6 files changed, 29 insertions(+), 10 deletions(-) diff --git a/onnxmltools/convert/xgboost/_parse.py b/onnxmltools/convert/xgboost/_parse.py index bb200307..89b1a37a 100644 --- a/onnxmltools/convert/xgboost/_parse.py +++ b/onnxmltools/convert/xgboost/_parse.py @@ -72,7 +72,8 @@ def _get_attributes(booster): config = json.loads(booster.save_config())["learner"]["learner_model_param"] if "num_class" in config: num_class = int(config["num_class"]) - ntrees = len(res) // num_class + ntrees = len(res) + num_class = 1 else: trees = len(res) if hasattr(booster, "best_ntree_limit"): diff --git a/onnxmltools/convert/xgboost/common.py b/onnxmltools/convert/xgboost/common.py index bfe93955..1be57c4e 100644 --- a/onnxmltools/convert/xgboost/common.py +++ b/onnxmltools/convert/xgboost/common.py @@ -16,18 +16,19 @@ def get_xgb_params(xgb_node): else: # XGBoost < 0.7 params = xgb_node.__dict__ + if hasattr("xgb_node", "save_config"): + config = json.loads(xgb_node.save_config()) + else: + config = json.loads(xgb_node.get_booster().save_config()) + num_class = int(config["learner"]["learner_model_param"]["num_class"]) params = {k: v for k, v in params.items() if v is not None} + params["num_class"] = num_class if "n_estimators" not in params and hasattr(xgb_node, "n_estimators"): # xgboost >= 1.0.2 if xgb_node.n_estimators is not None: params["n_estimators"] = xgb_node.n_estimators if params.get("base_score", None) is None: # xgboost >= 2.0 - if hasattr("xgb_node", "save_config"): - config = json.loads(xgb_node.save_config()) - else: - config = json.loads(xgb_node.get_booster().save_config()) - params["base_score"] = float( config["learner"]["learner_model_param"]["base_score"] ) diff --git a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py index c48ac73c..ee98539e 100644 --- a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py +++ b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py @@ -299,7 +299,12 @@ def convert(scope, operator, container): XGBConverter.fill_tree_attributes( js_trees, attr_pairs, [1 for _ in js_trees], True ) - ncl = (max(attr_pairs["class_treeids"]) + 1) // n_estimators + if "num_class" in params: + ncl = params["num_class"] + n_estimators = len(js_trees) // ncl + else: + ncl = (max(attr_pairs["class_treeids"]) + 1) // n_estimators + print("**", params) bst = xgb_node.get_booster() best_ntree_limit = getattr(bst, "best_ntree_limit", len(js_trees)) * ncl @@ -312,6 +317,7 @@ def convert(scope, operator, container): if len(attr_pairs["class_treeids"]) == 0: raise RuntimeError("XGBoost model is empty.") + if ncl <= 1: ncl = 2 if objective != "binary:hinge": @@ -332,8 +338,10 @@ def convert(scope, operator, container): attr_pairs["class_ids"] = [v % ncl for v in attr_pairs["class_treeids"]] classes = xgb_node.classes_ - if np.issubdtype(classes.dtype, np.floating) or np.issubdtype( - classes.dtype, np.integer + if ( + np.issubdtype(classes.dtype, np.floating) + or np.issubdtype(classes.dtype, np.integer) + or np.issubdtype(classes.dtype, np.bool_) ): attr_pairs["classlabels_int64s"] = classes.astype("int") else: diff --git a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py index 87b3990b..50095fea 100644 --- a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py +++ b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 +import json import numpy as np from ...common._registration import register_shape_calculator from ...common.utils import check_input_and_output_numbers, check_input_and_output_types @@ -26,8 +27,13 @@ def calculate_xgboost_classifier_output_shapes(operator): ntrees = len(js_trees) objective = params["objective"] n_estimators = get_n_estimators_classifier(xgb_node, params, js_trees) + config = json.loads(xgb_node.get_booster().save_config()) + num_class = int(config["learner"]["learner_model_param"]["num_class"]) - if objective == "binary:logistic": + if num_class is not None: + ncl = num_class + n_estimators = ntrees // ncl + elif objective == "binary:logistic": ncl = 2 else: ncl = ntrees // n_estimators diff --git a/onnxmltools/utils/utils_backend.py b/onnxmltools/utils/utils_backend.py index 84dbe146..f7ab5b42 100644 --- a/onnxmltools/utils/utils_backend.py +++ b/onnxmltools/utils/utils_backend.py @@ -188,6 +188,8 @@ def compare_outputs(expected, output, **kwargs): Disc = kwargs.pop("Disc", False) Mism = kwargs.pop("Mism", False) Opp = kwargs.pop("Opp", False) + if hasattr(expected, "dtype") and expected.dtype == numpy.bool_: + expected = expected.astype(numpy.int64) if Opp and not NoProb: raise ValueError("Opp is only available if NoProb is True") diff --git a/tests/xgboost/test_xgboost_converters.py b/tests/xgboost/test_xgboost_converters.py index 73d5cf0e..e6dd15de 100644 --- a/tests/xgboost/test_xgboost_converters.py +++ b/tests/xgboost/test_xgboost_converters.py @@ -677,4 +677,5 @@ def test_xgb_classifier_hinge(self): if __name__ == "__main__": + TestXGBoostModels().test_xgb_best_tree_limit() unittest.main(verbosity=2)