diff --git a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py index 6f3fdc2e..89b93e3b 100644 --- a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py +++ b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py @@ -242,7 +242,7 @@ def convert(scope, operator, container): classes = xgb_node.classes_ if (np.issubdtype(classes.dtype, np.floating) or - np.issubdtype(classes.dtype, np.signedinteger)): + np.issubdtype(classes.dtype, np.integer)): attr_pairs['classlabels_int64s'] = classes.astype('int') else: classes = np.array([s.encode('utf-8') for s in classes]) diff --git a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py index 0a80d7c9..144187ba 100644 --- a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py +++ b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py @@ -34,7 +34,7 @@ def calculate_xgboost_classifier_output_shapes(operator): ncl = 2 classes = xgb_node.classes_ if (np.issubdtype(classes.dtype, np.floating) or - np.issubdtype(classes.dtype, np.signedinteger)): + np.issubdtype(classes.dtype, np.integer)): operator.outputs[0].type = Int64TensorType(shape=[N]) else: operator.outputs[0].type = StringTensorType(shape=[N]) diff --git a/tests/xgboost/test_xgboost_converters.py b/tests/xgboost/test_xgboost_converters.py index fa5aa780..9d6d48f1 100644 --- a/tests/xgboost/test_xgboost_converters.py +++ b/tests/xgboost/test_xgboost_converters.py @@ -6,21 +6,25 @@ import unittest import numpy as np import pandas -from sklearn.datasets import load_diabetes, load_iris, make_classification +from sklearn.datasets import ( + load_diabetes, load_iris, make_classification, load_digits) from sklearn.model_selection import train_test_split from xgboost import XGBRegressor, XGBClassifier, train, DMatrix +from sklearn.preprocessing import StandardScaler from onnxmltools.convert import convert_xgboost from onnxmltools.convert.common.data_types import FloatTensorType from onnxmltools.utils import dump_data_and_model -def _fit_classification_model(model, n_classes, is_str=False): +def _fit_classification_model(model, n_classes, is_str=False, dtype=None): x, y = make_classification(n_classes=n_classes, n_features=100, n_samples=1000, random_state=42, n_informative=7) y = y.astype(np.str) if is_str else y.astype(np.int64) x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5, random_state=42) + if dtype is not None: + y_train = y_train.astype(dtype) model.fit(x_train, y_train) return model, x_test.astype(np.float32) @@ -67,6 +71,24 @@ def test_xgb_classifier(self): "< StrictVersion('1.3.0')", ) + @unittest.skipIf(sys.version_info[0] == 2, + reason="xgboost converter not tested on python 2") + def test_xgb_classifier_uint8(self): + xgb, x_test = _fit_classification_model( + XGBClassifier(), 2, dtype=np.uint8) + conv_model = convert_xgboost( + xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))]) + self.assertTrue(conv_model is not None) + dump_data_and_model( + x_test, + xgb, + conv_model, + basename="SklearnXGBClassifier", + allow_failure="StrictVersion(" + "onnx.__version__)" + "< StrictVersion('1.3.0')", + ) + @unittest.skipIf(sys.version_info[0] == 2, reason="xgboost converter not tested on python 2") def test_xgb_classifier_multi(self): @@ -260,6 +282,30 @@ def test_xgboost_10(self): allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", basename="XGBBoosterRegBug") + def test_xgboost_example_mnist(self): + """ + Train a simple xgboost model and store associated artefacts. + """ + X, y = load_digits(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y) + X_train = X_train.reshape((X_train.shape[0], -1)) + X_test = X_test.reshape((X_test.shape[0], -1)) + + scaler = StandardScaler() + X_train = scaler.fit_transform(X_train) + X_test = scaler.transform(X_test) + clf = XGBClassifier(objective="multi:softprob", n_jobs=-1) + clf.fit(X_train, y_train) + + sh = [None, X_train.shape[1]] + onnx_model = convert_xgboost( + clf, initial_types=[('input', FloatTensorType(sh))]) + + dump_data_and_model( + X_test.astype(np.float32), clf, onnx_model, + allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", + basename="XGBoostExample") + if __name__ == "__main__": unittest.main()