From bef5e2503eda89147cfa6d08defc8605c2957e8c Mon Sep 17 00:00:00 2001 From: Anna Volodkevich Date: Fri, 15 May 2020 04:37:31 -0700 Subject: [PATCH 1/4] catboost converter --- README.md | 4 +- onnxmltools/__init__.py | 1 + onnxmltools/convert/__init__.py | 1 + onnxmltools/convert/main.py | 23 ++++++++++ onnxmltools/utils/tests_helper.py | 3 ++ tests/catboost/test_CatBoost_converter.py | 53 +++++++++++++++++++++++ 6 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 tests/catboost/test_CatBoost_converter.py diff --git a/README.md b/README.md index f18f28d2..624c62df 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ ONNXMLTools enables you to convert models from different machine learning toolki * libsvm * XGBoost * H2O +* CatBoost

Pytorch has its builtin ONNX exporter check here for details

## Install @@ -31,7 +32,7 @@ pip install git+https://github.com/onnx/onnxmltools If you choose to install `onnxmltools` from its source code, you must set the environment variable `ONNX_ML=1` before installing the `onnx` package. ## Dependencies -This package relies on ONNX, NumPy, and ProtoBuf. If you are converting a model from scikit-learn, Core ML, Keras, LightGBM, SparkML, XGBoost, H2O or LibSVM, you will need an environment with the respective package installed from the list below: +This package relies on ONNX, NumPy, and ProtoBuf. If you are converting a model from scikit-learn, Core ML, Keras, LightGBM, SparkML, XGBoost, H2O, CatBoost or LibSVM, you will need an environment with the respective package installed from the list below: 1. scikit-learn 2. CoreMLTools 3. Keras (version 2.0.8 or higher) with the corresponding Tensorflow version @@ -40,6 +41,7 @@ This package relies on ONNX, NumPy, and ProtoBuf. If you are converting a model 6. XGBoost (scikit-learn interface) 7. libsvm 8. H2O +9. CatBoost ONNXMLTools has been tested with Python **3.5**, **3.6**, and **3.7**. Version 1.6.1 is the latest version supporting Python 2.7. diff --git a/onnxmltools/__init__.py b/onnxmltools/__init__.py index 659f96a5..9f31c18b 100644 --- a/onnxmltools/__init__.py +++ b/onnxmltools/__init__.py @@ -25,6 +25,7 @@ from .convert import convert_tensorflow from .convert import convert_xgboost from .convert import convert_h2o +from .convert import convert_catboost from .utils import load_model from .utils import save_model diff --git a/onnxmltools/convert/__init__.py b/onnxmltools/convert/__init__.py index 22288f4d..fb72b3d2 100644 --- a/onnxmltools/convert/__init__.py +++ b/onnxmltools/convert/__init__.py @@ -13,3 +13,4 @@ from .main import convert_tensorflow from .main import convert_xgboost from .main import convert_h2o +from .main import convert_catboost diff --git a/onnxmltools/convert/main.py b/onnxmltools/convert/main.py index da18b1fd..53dfec8b 100644 --- a/onnxmltools/convert/main.py +++ b/onnxmltools/convert/main.py @@ -44,6 +44,29 @@ def convert_libsvm(model, name=None, initial_types=None, doc_string='', target_o custom_conversion_functions, custom_shape_calculators) +def convert_catboost(model, name=None, initial_types=None, doc_string='', target_opset=None, + targeted_onnx=onnx.__version__, custom_conversion_functions=None, custom_shape_calculators=None): + try: + from catboost.utils import convert_to_onnx_object + except ImportError: + raise RuntimeError('CatBoost is not installed or need to be updated. ' + 'Please install/upgrade CatBoost to use this feature.') + + if custom_conversion_functions: + warnings.warn('custom_conversion_functions is not supported. Please set it to None.') + if custom_shape_calculators: + warnings.warn('custom_shape_calculators is not supported. Please set it to None.') + + export_parameters = { + 'onnx_domain': 'ai.catboost', + 'onnx_model_version': 0, + 'onnx_doc_string': doc_string, + 'onnx_graph_name': name + } + + return convert_to_onnx_object(model, export_parameters=export_parameters) + + def convert_lightgbm(model, name=None, initial_types=None, doc_string='', target_opset=None, targeted_onnx=onnx.__version__, custom_conversion_functions=None, custom_shape_calculators=None): if not utils.lightgbm_installed(): diff --git a/onnxmltools/utils/tests_helper.py b/onnxmltools/utils/tests_helper.py index 0a918200..02f05a96 100644 --- a/onnxmltools/utils/tests_helper.py +++ b/onnxmltools/utils/tests_helper.py @@ -212,6 +212,9 @@ def convert_model(model, name, input_types): model, prefix = convert_lightgbm(model, name, input_types), "LightGbm" else: raise RuntimeError("Unable to convert model of type '{0}'.".format(type(model))) + elif model.__class__.__name__.startswith("Cat"): + from onnxmltools.convert import convert_catboost + model, prefix = convert_catboost(model, name, input_types), "Cat" elif isinstance(model, BaseEstimator): from onnxmltools.convert import convert_sklearn model, prefix = convert_sklearn(model, name, input_types), "Sklearn" diff --git a/tests/catboost/test_CatBoost_converter.py b/tests/catboost/test_CatBoost_converter.py new file mode 100644 index 00000000..76f6c419 --- /dev/null +++ b/tests/catboost/test_CatBoost_converter.py @@ -0,0 +1,53 @@ +""" +Tests for CatBoostRegressor and CatBoostClassifier converter. +""" +import unittest +import numpy +import catboost +from sklearn.datasets import make_regression, make_classification +from onnxmltools.convert import convert_catboost +from onnxmltools.utils import dump_data_and_model, dump_single_regression, dump_multiple_classification + + +class TestCatBoost(unittest.TestCase): + def test_catboost_regressor(self): + X, y = make_regression(n_samples=100, n_features=4, random_state=0) + catboost_model = catboost.CatBoostRegressor(task_type='CPU', loss_function='RMSE', + n_estimators=10, verbose=0) + dump_single_regression(catboost_model) + + catboost_model.fit(X.astype(numpy.float32), y) + catboost_onnx = convert_catboost(catboost_model, name='CatBoostRegression', + doc_string='test regression') + self.assertTrue(catboost_onnx is not None) + dump_data_and_model(X.astype(numpy.float32), catboost_model, catboost_onnx, basename="CatBoostReg-Dec4") + + def test_catboost_bin_classifier(self): + X, y = make_classification(n_samples=100, n_features=4, random_state=0) + catboost_model = catboost.CatBoostClassifier(task_type='CPU', loss_function='CrossEntropy', + n_estimators=10, verbose=0) + + catboost_model.fit(X.astype(numpy.float32), y) + + catboost_onnx = convert_catboost(catboost_model, name='CatBoostBinClassification', + doc_string='test binary classification') + self.assertTrue(catboost_onnx is not None) + # onnx runtime returns zeros as class labels + # dump_data_and_model(X.astype(numpy.float32), catboost_model, catboost_onnx, basename="CatBoostBinClass") + + def test_catboost_multi_classifier(self): + X, y = make_classification(n_samples=10, n_informative=8, n_classes=3, random_state=0) + catboost_model = catboost.CatBoostClassifier(task_type='CPU', loss_function='MultiClass', n_estimators=100, + verbose=0) + + dump_multiple_classification(catboost_model) + + catboost_model.fit(X.astype(numpy.float32), y) + catboost_onnx = convert_catboost(catboost_model, name='CatBoostMultiClassification', + doc_string='test multiclass classification') + self.assertTrue(catboost_onnx is not None) + dump_data_and_model(X.astype(numpy.float32), catboost_model, catboost_onnx, basename="CatBoostMultiClass") + + +if __name__ == "__main__": + unittest.main() From eb80ab1504bb10cb38daaeda37c5b9e698c543ef Mon Sep 17 00:00:00 2001 From: Anna Volodkevich Date: Fri, 15 May 2020 04:55:21 -0700 Subject: [PATCH 2/4] requirements updated --- requirements-dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index a8b53aa8..dda73f23 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -15,3 +15,4 @@ scipy svm wheel xgboost +catboost From 59f26838ccb8033bc1af376fe1631e5c94d108c6 Mon Sep 17 00:00:00 2001 From: monkey0head Date: Wed, 27 May 2020 04:44:19 -0700 Subject: [PATCH 3/4] fixes --- .azure-pipelines/linux-CI-nightly.yml | 2 +- .azure-pipelines/linux-conda-CI.yml | 5 +++++ .azure-pipelines/win32-CI-nightly.yml | 2 +- .azure-pipelines/win32-conda-CI.yml | 7 +++++++ onnxmltools/convert/main.py | 20 ++++---------------- onnxmltools/utils/tests_helper.py | 4 ++-- requirements-dev.txt | 2 +- tests/catboost/test_CatBoost_converter.py | 8 ++++---- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/.azure-pipelines/linux-CI-nightly.yml b/.azure-pipelines/linux-CI-nightly.yml index 739c1784..e580e3f1 100644 --- a/.azure-pipelines/linux-CI-nightly.yml +++ b/.azure-pipelines/linux-CI-nightly.yml @@ -15,7 +15,7 @@ jobs: matrix: Python36-nightly: python.version: '3.6' - ONNX_PATH: onnx==1.6.0 + ONNX_PATH: onnx==1.7.0 ORT_PATH: -i https://test.pypi.org/simple/ ort-nightly maxParallel: 3 diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml index 9248023f..06094828 100644 --- a/.azure-pipelines/linux-conda-CI.yml +++ b/.azure-pipelines/linux-conda-CI.yml @@ -33,6 +33,11 @@ jobs: ONNX_PATH: onnx==1.6.0 ONNXRT_PATH: onnxruntime==1.1.1 xgboost.version: '>=1.0' + Python37-170-RT130: + python.version: '3.7' + ONNX_PATH: onnx==1.7.0 + ONNXRT_PATH: onnxruntime==1.3.0 + xgboost.version: '>=1.0' maxParallel: 3 steps: diff --git a/.azure-pipelines/win32-CI-nightly.yml b/.azure-pipelines/win32-CI-nightly.yml index 524b196b..fafce296 100644 --- a/.azure-pipelines/win32-CI-nightly.yml +++ b/.azure-pipelines/win32-CI-nightly.yml @@ -15,7 +15,7 @@ jobs: matrix: Python36-nightly: python.version: '3.6' - ONNX_PATH: onnx==1.6.0 + ONNX_PATH: onnx==1.7.0 ONNXRT_PATH: -i https://test.pypi.org/simple/ ort-nightly COREML_PATH: git+https://github.com/apple/coremltools@3.1 maxParallel: 3 diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml index 08e90b59..2dbac7df 100644 --- a/.azure-pipelines/win32-conda-CI.yml +++ b/.azure-pipelines/win32-conda-CI.yml @@ -41,6 +41,13 @@ jobs: COREML_PATH: git+https://github.com/apple/coremltools@3.1 sklearn.version: '' + Python37-170-RT130: + python.version: '3.7' + ONNX_PATH: onnx==1.7.0 + ONNXRT_PATH: onnxruntime==1.3.0 + COREML_PATH: git+https://github.com/apple/coremltools@3.1 + sklearn.version: '' + maxParallel: 3 steps: diff --git a/onnxmltools/convert/main.py b/onnxmltools/convert/main.py index 53dfec8b..7037ee76 100644 --- a/onnxmltools/convert/main.py +++ b/onnxmltools/convert/main.py @@ -44,27 +44,15 @@ def convert_libsvm(model, name=None, initial_types=None, doc_string='', target_o custom_conversion_functions, custom_shape_calculators) -def convert_catboost(model, name=None, initial_types=None, doc_string='', target_opset=None, - targeted_onnx=onnx.__version__, custom_conversion_functions=None, custom_shape_calculators=None): +def convert_catboost(model, name=None, initial_types=None, doc_string='', target_opset=None): try: from catboost.utils import convert_to_onnx_object except ImportError: - raise RuntimeError('CatBoost is not installed or need to be updated. ' + raise RuntimeError('CatBoost is not installed or needs to be updated. ' 'Please install/upgrade CatBoost to use this feature.') - if custom_conversion_functions: - warnings.warn('custom_conversion_functions is not supported. Please set it to None.') - if custom_shape_calculators: - warnings.warn('custom_shape_calculators is not supported. Please set it to None.') - - export_parameters = { - 'onnx_domain': 'ai.catboost', - 'onnx_model_version': 0, - 'onnx_doc_string': doc_string, - 'onnx_graph_name': name - } - - return convert_to_onnx_object(model, export_parameters=export_parameters) + return convert_to_onnx_object(model, export_parameters={'onnx_doc_string': doc_string, 'onnx_graph_name': name}, + initial_types=initial_types, target_opset=target_opset) def convert_lightgbm(model, name=None, initial_types=None, doc_string='', target_opset=None, diff --git a/onnxmltools/utils/tests_helper.py b/onnxmltools/utils/tests_helper.py index 02f05a96..b57bc807 100644 --- a/onnxmltools/utils/tests_helper.py +++ b/onnxmltools/utils/tests_helper.py @@ -212,9 +212,9 @@ def convert_model(model, name, input_types): model, prefix = convert_lightgbm(model, name, input_types), "LightGbm" else: raise RuntimeError("Unable to convert model of type '{0}'.".format(type(model))) - elif model.__class__.__name__.startswith("Cat"): + elif model.__class__.__name__.startswith("CatBoost"): from onnxmltools.convert import convert_catboost - model, prefix = convert_catboost(model, name, input_types), "Cat" + model, prefix = convert_catboost(model, name, input_types), "CatBoost" elif isinstance(model, BaseEstimator): from onnxmltools.convert import convert_sklearn model, prefix = convert_sklearn(model, name, input_types), "Sklearn" diff --git a/requirements-dev.txt b/requirements-dev.txt index dda73f23..aed1c68b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -14,5 +14,5 @@ scikit-learn scipy svm wheel -xgboost +xgboost<=1.0.2 catboost diff --git a/tests/catboost/test_CatBoost_converter.py b/tests/catboost/test_CatBoost_converter.py index 76f6c419..56f8f632 100644 --- a/tests/catboost/test_CatBoost_converter.py +++ b/tests/catboost/test_CatBoost_converter.py @@ -6,7 +6,8 @@ import catboost from sklearn.datasets import make_regression, make_classification from onnxmltools.convert import convert_catboost -from onnxmltools.utils import dump_data_and_model, dump_single_regression, dump_multiple_classification +from onnxmltools.utils import dump_data_and_model, dump_single_regression, \ + dump_multiple_classification, dump_binary_classification class TestCatBoost(unittest.TestCase): @@ -26,14 +27,13 @@ def test_catboost_bin_classifier(self): X, y = make_classification(n_samples=100, n_features=4, random_state=0) catboost_model = catboost.CatBoostClassifier(task_type='CPU', loss_function='CrossEntropy', n_estimators=10, verbose=0) - + dump_binary_classification(catboost_model) catboost_model.fit(X.astype(numpy.float32), y) catboost_onnx = convert_catboost(catboost_model, name='CatBoostBinClassification', doc_string='test binary classification') self.assertTrue(catboost_onnx is not None) - # onnx runtime returns zeros as class labels - # dump_data_and_model(X.astype(numpy.float32), catboost_model, catboost_onnx, basename="CatBoostBinClass") + dump_data_and_model(X.astype(numpy.float32), catboost_model, catboost_onnx, basename="CatBoostBinClass") def test_catboost_multi_classifier(self): X, y = make_classification(n_samples=10, n_informative=8, n_classes=3, random_state=0) From d9359ecf12f75ee5390ba5e31aeb8fdd629f19c8 Mon Sep 17 00:00:00 2001 From: monkey0head Date: Wed, 27 May 2020 06:05:57 -0700 Subject: [PATCH 4/4] binclass fix --- tests/catboost/test_CatBoost_converter.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/catboost/test_CatBoost_converter.py b/tests/catboost/test_CatBoost_converter.py index 56f8f632..ec2a40a4 100644 --- a/tests/catboost/test_CatBoost_converter.py +++ b/tests/catboost/test_CatBoost_converter.py @@ -6,8 +6,7 @@ import catboost from sklearn.datasets import make_regression, make_classification from onnxmltools.convert import convert_catboost -from onnxmltools.utils import dump_data_and_model, dump_single_regression, \ - dump_multiple_classification, dump_binary_classification +from onnxmltools.utils import dump_data_and_model, dump_single_regression, dump_multiple_classification class TestCatBoost(unittest.TestCase): @@ -27,7 +26,6 @@ def test_catboost_bin_classifier(self): X, y = make_classification(n_samples=100, n_features=4, random_state=0) catboost_model = catboost.CatBoostClassifier(task_type='CPU', loss_function='CrossEntropy', n_estimators=10, verbose=0) - dump_binary_classification(catboost_model) catboost_model.fit(X.astype(numpy.float32), y) catboost_onnx = convert_catboost(catboost_model, name='CatBoostBinClassification',