Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #421, support unsigned integer as class type #426

Merged
merged 4 commits into from
Jan 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion onnxmltools/convert/xgboost/operator_converters/XGBoost.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def convert(scope, operator, container):

classes = xgb_node.classes_
if (np.issubdtype(classes.dtype, np.floating) or
np.issubdtype(classes.dtype, np.signedinteger)):
np.issubdtype(classes.dtype, np.integer)):
attr_pairs['classlabels_int64s'] = classes.astype('int')
else:
classes = np.array([s.encode('utf-8') for s in classes])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def calculate_xgboost_classifier_output_shapes(operator):
ncl = 2
classes = xgb_node.classes_
if (np.issubdtype(classes.dtype, np.floating) or
np.issubdtype(classes.dtype, np.signedinteger)):
np.issubdtype(classes.dtype, np.integer)):
operator.outputs[0].type = Int64TensorType(shape=[N])
else:
operator.outputs[0].type = StringTensorType(shape=[N])
Expand Down
50 changes: 48 additions & 2 deletions tests/xgboost/test_xgboost_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,25 @@
import unittest
import numpy as np
import pandas
from sklearn.datasets import load_diabetes, load_iris, make_classification
from sklearn.datasets import (
load_diabetes, load_iris, make_classification, load_digits)
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor, XGBClassifier, train, DMatrix
from sklearn.preprocessing import StandardScaler
from onnxmltools.convert import convert_xgboost
from onnxmltools.convert.common.data_types import FloatTensorType
from onnxmltools.utils import dump_data_and_model


def _fit_classification_model(model, n_classes, is_str=False):
def _fit_classification_model(model, n_classes, is_str=False, dtype=None):
x, y = make_classification(n_classes=n_classes, n_features=100,
n_samples=1000,
random_state=42, n_informative=7)
y = y.astype(np.str) if is_str else y.astype(np.int64)
x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5,
random_state=42)
if dtype is not None:
y_train = y_train.astype(dtype)
model.fit(x_train, y_train)
return model, x_test.astype(np.float32)

Expand Down Expand Up @@ -67,6 +71,24 @@ def test_xgb_classifier(self):
"< StrictVersion('1.3.0')",
)

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgb_classifier_uint8(self):
xgb, x_test = _fit_classification_model(
XGBClassifier(), 2, dtype=np.uint8)
conv_model = convert_xgboost(
xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))])
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifier",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgb_classifier_multi(self):
Expand Down Expand Up @@ -260,6 +282,30 @@ def test_xgboost_10(self):
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterRegBug")

def test_xgboost_example_mnist(self):
"""
Train a simple xgboost model and store associated artefacts.
"""
X, y = load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
X_train = X_train.reshape((X_train.shape[0], -1))
X_test = X_test.reshape((X_test.shape[0], -1))

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
clf = XGBClassifier(objective="multi:softprob", n_jobs=-1)
clf.fit(X_train, y_train)

sh = [None, X_train.shape[1]]
onnx_model = convert_xgboost(
clf, initial_types=[('input', FloatTensorType(sh))])

dump_data_and_model(
X_test.astype(np.float32), clf, onnx_model,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBoostExample")


if __name__ == "__main__":
unittest.main()