Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace #507 + fix bug with XGBoost converter when base_score is None #510

Merged
merged 11 commits into from
Oct 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .azure-pipelines/win32-conda-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ jobs:
python -m pip install -r requirements-dev.txt
displayName: 'Install dependencies-dev'

- script: |
call activate py$(python.version)
python -m pip install --upgrade scikit-learn
displayName: 'Install scikit-learn'

- script: |
call activate py$(python.version)
python -m pip install %COREML_PATH%
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ input2 = Input(shape=(D,))
mapped1_2 = sub_model1(input1)
mapped2_2 = sub_model2(input2)
sub_sum = Add()([mapped1_2, mapped2_2])
keras_model = Model(inputs=[input1, input2], output=sub_sum)
keras_model = Model(inputs=[input1, input2], outputs=sub_sum)

# Convert it! The target_opset parameter is optional.
onnx_model = onnxmltools.convert_keras(keras_model, target_opset=7)
Expand Down
2 changes: 2 additions & 0 deletions onnxmltools/convert/xgboost/operator_converters/XGBoost.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ def common_members(xgb_node, inputs):
params = XGBConverter.get_xgb_params(xgb_node)
objective = params["objective"]
base_score = params["base_score"]
if base_score is None:
base_score = 0.5
booster = xgb_node.get_booster()
# The json format was available in October 2017.
# XGBoost 0.7 was the first version released with it.
Expand Down
22 changes: 16 additions & 6 deletions tests/catboost/test_CatBoost_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,25 @@
Tests for CatBoostRegressor and CatBoostClassifier converter.
"""
import unittest
import numpy
import warnings
import catboost

from sklearn.datasets import make_regression, make_classification
from distutils.version import StrictVersion
import numpy
try:
import sklearn
from sklearn.datasets import make_regression, make_classification
except (ImportError, FileNotFoundError):
sklearn = None
try:
import catboost
except (ImportError, FileNotFoundError):
catboost = None
from onnxmltools.convert import convert_catboost
from onnxmltools.utils import dump_data_and_model, dump_single_regression, dump_multiple_classification


class TestCatBoost(unittest.TestCase):

@unittest.skipIf(catboost is None or sklearn is None, reason="catboost not imported")
def test_catboost_regressor(self):
X, y = make_regression(n_samples=100, n_features=4, random_state=0)
catboost_model = catboost.CatBoostRegressor(task_type='CPU', loss_function='RMSE',
Expand All @@ -26,11 +35,11 @@ def test_catboost_regressor(self):
self.assertTrue(catboost_onnx is not None)
dump_data_and_model(X.astype(numpy.float32), catboost_model, catboost_onnx, basename="CatBoostReg-Dec4")

@unittest.skipIf(catboost is None or sklearn is None, reason="catboost not imported")
def test_catboost_bin_classifier(self):
import onnxruntime
from distutils.version import StrictVersion

if StrictVersion(onnxruntime.__version__) >= StrictVersion('1.3.0'):
if StrictVersion('.'.join(onnxruntime.__version__.split('.')[:2])) >= StrictVersion('1.3.0'):
X, y = make_classification(n_samples=100, n_features=4, random_state=0)
catboost_model = catboost.CatBoostClassifier(task_type='CPU', loss_function='CrossEntropy',
n_estimators=10, verbose=0)
Expand All @@ -45,6 +54,7 @@ def test_catboost_bin_classifier(self):
warnings.warn('Converted CatBoost models for binary classification work with onnxruntime version 1.3.0 or '
'a newer one')

@unittest.skipIf(catboost is None or sklearn is None, reason="catboost not imported")
def test_catboost_multi_classifier(self):
X, y = make_classification(n_samples=10, n_informative=8, n_classes=3, random_state=0)
catboost_model = catboost.CatBoostClassifier(task_type='CPU', loss_function='MultiClass',
Expand Down
6 changes: 3 additions & 3 deletions tests/h2o/test_h2o_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def test_h2o_unsupported_algo(self):
mojo_path, test_data = _train_classifier(gbm, 2, is_str=True)
with self.assertRaises(ValueError) as err:
_convert_mojo(mojo_path)
self.assertRegexpMatches(err.exception.args[0], "not supported")
self.assertRegex(err.exception.args[0], "not supported")

def test_h2o_regressor_unsupported_dists(self):
diabetes = load_diabetes()
Expand All @@ -169,7 +169,7 @@ def test_h2o_regressor_unsupported_dists(self):
mojo_path = _make_mojo(gbm, train)
with self.assertRaises(ValueError) as err:
_convert_mojo(mojo_path)
self.assertRegexpMatches(err.exception.args[0], "not supported")
self.assertRegex(err.exception.args[0], "not supported")

def test_h2o_regressor(self):
diabetes = load_diabetes()
Expand Down Expand Up @@ -202,7 +202,7 @@ def test_h2o_classifier_multi_2class(self):
mojo_path, test_data = _train_classifier(gbm, 2, is_str=True)
with self.assertRaises(ValueError) as err:
_convert_mojo(mojo_path)
self.assertRegexpMatches(err.exception.args[0], "not supported")
self.assertRegex(err.exception.args[0], "not supported")

def test_h2o_classifier_bin_cat(self):
y = "IsDepDelayed_REC"
Expand Down
107 changes: 24 additions & 83 deletions tests/xgboost/test_xgboost_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,8 @@ def test_xgb_regressor(self):
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test.astype("float32"),
xgb,
conv_model,
basename="SklearnXGBRegressor-Dec3",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
xgb, conv_model,
basename="SklearnXGBRegressor-Dec3")

def test_xgb_classifier(self):
xgb, x_test = _fit_classification_model(XGBClassifier(), 2)
Expand All @@ -68,14 +63,8 @@ def test_xgb_classifier(self):
target_opset=TARGET_OPSET)
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifier",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
x_test, xgb, conv_model,
basename="SklearnXGBClassifier")

def test_xgb_classifier_uint8(self):
xgb, x_test = _fit_classification_model(
Expand All @@ -85,14 +74,8 @@ def test_xgb_classifier_uint8(self):
target_opset=TARGET_OPSET)
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifier",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
x_test, xgb, conv_model,
basename="SklearnXGBClassifier")

def test_xgb_classifier_multi(self):
xgb, x_test = _fit_classification_model(XGBClassifier(), 3)
Expand All @@ -101,14 +84,8 @@ def test_xgb_classifier_multi(self):
target_opset=TARGET_OPSET)
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifierMulti",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
x_test, xgb, conv_model,
basename="SklearnXGBClassifierMulti")

def test_xgb_classifier_multi_reglog(self):
xgb, x_test = _fit_classification_model(
Expand All @@ -118,14 +95,8 @@ def test_xgb_classifier_multi_reglog(self):
target_opset=TARGET_OPSET)
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifierMultiRegLog",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
x_test, xgb, conv_model,
basename="SklearnXGBClassifierMultiRegLog")

def test_xgb_classifier_reglog(self):
xgb, x_test = _fit_classification_model(
Expand All @@ -135,14 +106,8 @@ def test_xgb_classifier_reglog(self):
target_opset=TARGET_OPSET)
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifierRegLog",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
x_test, xgb, conv_model,
basename="SklearnXGBClassifierRegLog")

def test_xgb_classifier_multi_str_labels(self):
xgb, x_test = _fit_classification_model(
Expand All @@ -152,14 +117,8 @@ def test_xgb_classifier_multi_str_labels(self):
target_opset=TARGET_OPSET)
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifierMultiStrLabels",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
x_test, xgb, conv_model,
basename="SklearnXGBClassifierMultiStrLabels")

def test_xgb_classifier_multi_discrete_int_labels(self):
iris = load_iris()
Expand All @@ -180,13 +139,8 @@ def test_xgb_classifier_multi_discrete_int_labels(self):
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test.astype("float32"),
xgb,
conv_model,
basename="SklearnXGBClassifierMultiDiscreteIntLabels",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
xgb, conv_model,
basename="SklearnXGBClassifierMultiDiscreteIntLabels")

def test_xgboost_booster_classifier_bin(self):
x, y = make_classification(n_classes=2, n_features=5,
Expand All @@ -202,9 +156,7 @@ def test_xgboost_booster_classifier_bin(self):
[('input', FloatTensorType([None, x.shape[1]]))],
target_opset=TARGET_OPSET)
dump_data_and_model(x_test.astype(np.float32),
model, model_onnx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterMCl")
model, model_onnx, basename="XGBBoosterMCl")

def test_xgboost_booster_classifier_multiclass_softprob(self):
x, y = make_classification(n_classes=3, n_features=5,
Expand All @@ -221,9 +173,7 @@ def test_xgboost_booster_classifier_multiclass_softprob(self):
[('input', FloatTensorType([None, x.shape[1]]))],
target_opset=TARGET_OPSET)
dump_data_and_model(x_test.astype(np.float32),
model, model_onnx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterMClSoftProb")
model, model_onnx, basename="XGBBoosterMClSoftProb")

def test_xgboost_booster_classifier_multiclass_softmax(self):
x, y = make_classification(n_classes=3, n_features=5,
Expand All @@ -240,9 +190,7 @@ def test_xgboost_booster_classifier_multiclass_softmax(self):
[('input', FloatTensorType([None, x.shape[1]]))],
target_opset=TARGET_OPSET)
dump_data_and_model(x_test.astype(np.float32),
model, model_onnx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterMClSoftMax")
model, model_onnx, basename="XGBBoosterMClSoftMax")

def test_xgboost_booster_classifier_reg(self):
x, y = make_classification(n_classes=2, n_features=5,
Expand All @@ -259,9 +207,7 @@ def test_xgboost_booster_classifier_reg(self):
[('input', FloatTensorType([None, x.shape[1]]))],
target_opset=TARGET_OPSET)
dump_data_and_model(x_test.astype(np.float32),
model, model_onnx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterReg")
model, model_onnx, basename="XGBBoosterReg")

def test_xgboost_10(self):
this = os.path.abspath(os.path.dirname(__file__))
Expand All @@ -279,9 +225,9 @@ def test_xgboost_10(self):
}

train_df = pandas.read_csv(train)
X_train, y_train = train_df.drop('label', axis=1).values, train_df['label'].values
X_train, y_train = train_df.drop('label', axis=1).values, train_df['label'].fillna(0).values
test_df = pandas.read_csv(test)
X_test, y_test = test_df.drop('label', axis=1).values, test_df['label'].values
X_test, y_test = test_df.drop('label', axis=1).values, test_df['label'].fillna(0).values

regressor = XGBRegressor(verbose=0, objective='reg:squarederror', **param_distributions)
regressor.fit(X_train, y_train)
Expand All @@ -292,9 +238,7 @@ def test_xgboost_10(self):
target_opset=TARGET_OPSET)

dump_data_and_model(
X_test.astype(np.float32),
regressor, model_onnx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
X_test.astype(np.float32), regressor, model_onnx,
basename="XGBBoosterRegBug")

def test_xgboost_classifier_i5450(self):
Expand All @@ -315,9 +259,7 @@ def test_xgboost_classifier_i5450(self):
bst = clr.get_booster()
bst.dump_model('dump.raw.txt')
dump_data_and_model(
X_test.astype(np.float32) + 1e-5,
clr, onx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
X_test.astype(np.float32) + 1e-5, clr, onx,
basename="XGBClassifierIris")

def test_xgboost_example_mnist(self):
Expand All @@ -342,7 +284,6 @@ def test_xgboost_example_mnist(self):

dump_data_and_model(
X_test.astype(np.float32), clf, onnx_model,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBoostExample")

def test_xgb_empty_tree(self):
Expand Down