From c9ca134314190d115bebc2e68aa3a6cde8155b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Fri, 22 May 2020 17:53:37 +0200 Subject: [PATCH 01/17] Checks that issue #197 is fixed. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré --- tests/test_sklearn_tfidf_vectorizer_converter_dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py b/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py index 008238596..9b966d65b 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py @@ -23,7 +23,6 @@ def test_tfidf_20newsgroups(self): X, y = np.array(data.data)[:100], np.array(data.target)[:100] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.5, random_state=42) - model = TfidfVectorizer().fit(X_train) onnx_model = convert_sklearn( model, 'cv', [('input', StringTensorType(X_test.shape))]) From de0b079383affeea611204fec3ae6cc741b0371b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Fri, 22 May 2020 17:58:35 +0200 Subject: [PATCH 02/17] Update test_sklearn_tfidf_vectorizer_converter_dataset.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré --- ...sklearn_tfidf_vectorizer_converter_dataset.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py b/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py index 9b966d65b..9b85ef256 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py @@ -23,6 +23,7 @@ def test_tfidf_20newsgroups(self): X, y = np.array(data.data)[:100], np.array(data.target)[:100] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.5, random_state=42) + model = TfidfVectorizer().fit(X_train) onnx_model = convert_sklearn( model, 'cv', [('input', StringTensorType(X_test.shape))]) @@ -50,6 +51,21 @@ def test_tfidf_20newsgroups_nolowercase(self): allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.4.0')") + def test_tfidf_20newsgroups_char(self): + data = fetch_20newsgroups() + X, y = np.array(data.data)[:100], np.array(data.target)[:100] + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.5, random_state=42) + + model = TfidfVectorizer(analyzer='char').fit(X_train) + onnx_model = convert_sklearn( + model, 'cv', [('input', StringTensorType(X_test.shape))]) + dump_data_and_model( + X_test, model, onnx_model, + basename="SklearnTfidfVectorizer20newsgroupsChar", + allow_failure="StrictVersion(onnxruntime.__version__)" + " <= StrictVersion('0.4.0')") + if __name__ == "__main__": unittest.main() From 9e964e779b14baf84bb12fb132bf53a450ba431c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 25 Aug 2020 18:20:51 +0200 Subject: [PATCH 03/17] Fixes #524, HistGradientBoosting 0.24 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré --- skl2onnx/common/tree_ensemble.py | 5 ++++- tests/test_sklearn_random_forest_converters.py | 11 ++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/skl2onnx/common/tree_ensemble.py b/skl2onnx/common/tree_ensemble.py index ebce08679..46a115aef 100644 --- a/skl2onnx/common/tree_ensemble.py +++ b/skl2onnx/common/tree_ensemble.py @@ -208,7 +208,10 @@ def add_tree_to_attribute_pairs_hist_gradient_boosting( else: mode = 'BRANCH_LEQ' feat_id = node['feature_idx'] - threshold = node['threshold'] + try: + threshold = node['threshold'] + except ValueError as e: + threshold = node['num_threshold'] left_child_id = node['left'] right_child_id = node['right'] missing = node['missing_go_to_left'] diff --git a/tests/test_sklearn_random_forest_converters.py b/tests/test_sklearn_random_forest_converters.py index cc41831f2..d19b6cf48 100644 --- a/tests/test_sklearn_random_forest_converters.py +++ b/tests/test_sklearn_random_forest_converters.py @@ -654,14 +654,15 @@ def test_extratreesregressor_decision_path(self): reason="Requires ONNX-ML extension.") @unittest.skipIf(TARGET_OPSET < 12, reason="LabelEncoder") def test_randomforestclassifier_decision_path(self): - model = RandomForestClassifier(max_depth=2, n_estimators=2) + model = RandomForestClassifier(max_depth=2, n_estimators=3) X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) initial_types = [('input', FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( model, initial_types=initial_types, - options={id(model): {'decision_path': True, 'zipmap': False}}) + options={id(model): {'decision_path': True, 'zipmap': False}}, + target_opset=TARGET_OPSET) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'input': X.astype(numpy.float32)}) pred = model.predict(X) @@ -677,14 +678,15 @@ def test_randomforestclassifier_decision_path(self): reason="Requires ONNX-ML extension.") @unittest.skipIf(TARGET_OPSET < 12, reason="LabelEncoder") def test_extratreesclassifier_decision_path(self): - model = ExtraTreesClassifier(max_depth=2, n_estimators=2) + model = ExtraTreesClassifier(max_depth=2, n_estimators=3) X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) initial_types = [('input', FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( model, initial_types=initial_types, - options={id(model): {'decision_path': True, 'zipmap': False}}) + options={id(model): {'decision_path': True, 'zipmap': False}}, + target_opset=TARGET_OPSET) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'input': X.astype(numpy.float32)}) pred = model.predict(X) @@ -698,5 +700,4 @@ def test_extratreesclassifier_decision_path(self): if __name__ == "__main__": - # TestSklearnTreeEnsembleModels().test_randomforestclassifier_decision_path() unittest.main() From 6d7cc9109e24760fdcae5be991f98589ff28130c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 25 Aug 2020 19:00:25 +0200 Subject: [PATCH 04/17] Update test_sklearn_tfidf_vectorizer_converter_dataset.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré --- ..._sklearn_tfidf_vectorizer_converter_dataset.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py b/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py index 9b85ef256..008238596 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter_dataset.py @@ -51,21 +51,6 @@ def test_tfidf_20newsgroups_nolowercase(self): allow_failure="StrictVersion(onnxruntime.__version__)" " <= StrictVersion('0.4.0')") - def test_tfidf_20newsgroups_char(self): - data = fetch_20newsgroups() - X, y = np.array(data.data)[:100], np.array(data.target)[:100] - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.5, random_state=42) - - model = TfidfVectorizer(analyzer='char').fit(X_train) - onnx_model = convert_sklearn( - model, 'cv', [('input', StringTensorType(X_test.shape))]) - dump_data_and_model( - X_test, model, onnx_model, - basename="SklearnTfidfVectorizer20newsgroupsChar", - allow_failure="StrictVersion(onnxruntime.__version__)" - " <= StrictVersion('0.4.0')") - if __name__ == "__main__": unittest.main() From 486f1919a709b8d96de3f9ff2d9ea1d17971c63c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Wed, 26 Aug 2020 11:25:39 +0200 Subject: [PATCH 05/17] Update tree_ensemble.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré --- skl2onnx/common/tree_ensemble.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skl2onnx/common/tree_ensemble.py b/skl2onnx/common/tree_ensemble.py index 46a115aef..cffdbdffa 100644 --- a/skl2onnx/common/tree_ensemble.py +++ b/skl2onnx/common/tree_ensemble.py @@ -210,7 +210,7 @@ def add_tree_to_attribute_pairs_hist_gradient_boosting( feat_id = node['feature_idx'] try: threshold = node['threshold'] - except ValueError as e: + except ValueError: threshold = node['num_threshold'] left_child_id = node['left'] right_child_id = node['right'] From d210231592b1ec7fccd9887d8ce13db63262760c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Fri, 28 Aug 2020 18:37:09 +0200 Subject: [PATCH 06/17] Fixes nightly build (#527) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix nightly build Signed-off-by: xavier dupré --- tests/test_sklearn_nearest_neighbour_converter.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_sklearn_nearest_neighbour_converter.py b/tests/test_sklearn_nearest_neighbour_converter.py index b9a0ee8a1..2af29f9ac 100644 --- a/tests/test_sklearn_nearest_neighbour_converter.py +++ b/tests/test_sklearn_nearest_neighbour_converter.py @@ -430,7 +430,7 @@ def test_model_knn_classifier_binary_class(self): @unittest.skipIf( StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), reason="not available") - @unittest.skipIf(onnx_opset_version() < TARGET_OPSET, + @unittest.skipIf(onnx_opset_version() < 12, reason="needs higher target_opset") def test_model_knn_classifier_binary_class_radius(self): model, X = self._fit_model_binary_classification( @@ -470,6 +470,8 @@ def test_model_knn_classifier_multi_class(self): @unittest.skipIf( StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), reason="not available") + @unittest.skipIf(onnx_opset_version() < 12, + reason="needs higher target_opset") def test_model_knn_classifier_multi_class_radius(self): model, X = self._fit_model_multiclass_classification( RadiusNeighborsClassifier()) From e5c4213d2da57b57750c3ae5788988bab1c9849a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Thu, 3 Sep 2020 19:31:12 +0200 Subject: [PATCH 07/17] Add complete tutorial to the documentation (#517) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add full tutorial to the documentation Signed-off-by: xavier dupré --- .azure-pipelines/linux-conda-CI.yml | 20 +- docs/conf.py | 107 +- docs/exts/github_link.py | 82 ++ docs/index.rst | 3 +- docs/index_tutorial.rst | 44 + docs/{tutorial.rst => introduction.rst} | 6 +- docs/requirements.txt | 32 + docs/tests/test_documentation_examples.py | 9 +- docs/tests/test_documentation_tutorial.py | 81 ++ docs/tests/test_utils_benchmark.py | 28 + docs/tests/test_utils_classes.py | 18 + docs/tutorial/README.txt | 2 + docs/tutorial/plot_abegin_convert_pipeline.py | 124 ++ docs/tutorial/plot_bbegin_measure_time.py | 137 +++ docs/tutorial/plot_cbegin_opset.py | 122 ++ docs/tutorial/plot_dbegin_options.py | 250 ++++ docs/tutorial/plot_dbegin_options_list.py | 119 ++ docs/tutorial/plot_ebegin_float_double.py | 358 ++++++ docs/tutorial/plot_fbegin_investigate.py | 122 ++ docs/tutorial/plot_gbegin_dataframe.py | 183 +++ .../tutorial/plot_gbegin_transfer_learning.py | 248 ++++ docs/tutorial/plot_gexternal_lightgbm.py | 108 ++ docs/tutorial/plot_gexternal_xgboost.py | 194 ++++ docs/tutorial/plot_icustom_converter.py | 240 ++++ docs/tutorial/plot_jcustom_syntax.py | 178 +++ .../plot_kcustom_converter_wrapper.py | 203 ++++ docs/tutorial/plot_lcustom_options.py | 204 ++++ docs/tutorial/plot_mcustom_parser.py | 191 +++ docs/tutorial/plot_pextend_python_runtime.py | 401 +++++++ docs/tutorial/plot_qextend_onnxruntime.py | 19 + docs/tutorial_1-5_external.rst | 15 + docs/tutorial_1_simple.rst | 25 + docs/tutorial_2_new_converter.rst | 38 + docs/tutorial_3_new_operator.rst | 23 + skl2onnx/tutorial/__init__.py | 5 + skl2onnx/tutorial/benchmark.py | 43 + skl2onnx/tutorial/imagenet_classes.py | 1031 +++++++++++++++++ 37 files changed, 4993 insertions(+), 20 deletions(-) create mode 100644 docs/exts/github_link.py create mode 100644 docs/index_tutorial.rst rename docs/{tutorial.rst => introduction.rst} (99%) create mode 100644 docs/requirements.txt create mode 100644 docs/tests/test_documentation_tutorial.py create mode 100644 docs/tests/test_utils_benchmark.py create mode 100644 docs/tests/test_utils_classes.py create mode 100644 docs/tutorial/README.txt create mode 100644 docs/tutorial/plot_abegin_convert_pipeline.py create mode 100644 docs/tutorial/plot_bbegin_measure_time.py create mode 100644 docs/tutorial/plot_cbegin_opset.py create mode 100644 docs/tutorial/plot_dbegin_options.py create mode 100644 docs/tutorial/plot_dbegin_options_list.py create mode 100644 docs/tutorial/plot_ebegin_float_double.py create mode 100644 docs/tutorial/plot_fbegin_investigate.py create mode 100644 docs/tutorial/plot_gbegin_dataframe.py create mode 100644 docs/tutorial/plot_gbegin_transfer_learning.py create mode 100644 docs/tutorial/plot_gexternal_lightgbm.py create mode 100644 docs/tutorial/plot_gexternal_xgboost.py create mode 100644 docs/tutorial/plot_icustom_converter.py create mode 100644 docs/tutorial/plot_jcustom_syntax.py create mode 100644 docs/tutorial/plot_kcustom_converter_wrapper.py create mode 100644 docs/tutorial/plot_lcustom_options.py create mode 100644 docs/tutorial/plot_mcustom_parser.py create mode 100644 docs/tutorial/plot_pextend_python_runtime.py create mode 100644 docs/tutorial/plot_qextend_onnxruntime.py create mode 100644 docs/tutorial_1-5_external.rst create mode 100644 docs/tutorial_1_simple.rst create mode 100644 docs/tutorial_2_new_converter.rst create mode 100644 docs/tutorial_3_new_operator.rst create mode 100644 skl2onnx/tutorial/__init__.py create mode 100644 skl2onnx/tutorial/benchmark.py create mode 100644 skl2onnx/tutorial/imagenet_classes.py diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml index 9ffaf1925..7fdca64c5 100644 --- a/.azure-pipelines/linux-conda-CI.yml +++ b/.azure-pipelines/linux-conda-CI.yml @@ -241,11 +241,6 @@ jobs: pip install --no-deps onnxmltools # git+https://github.com/onnx/onnxmltools.git displayName: 'install onnxmltools' - - script: | - pip install matplotlib pydot - displayName: 'install modules for examples' - condition: eq(variables['run.example'], '1') - # Check flake8 after the tests to get more feedback. # It is checked before the tests on the windows build. - script: | @@ -271,10 +266,24 @@ jobs: then export TEST_TARGET_OPSET="$(onnx.target_opset)" fi + pip install -r docs/requirements.txt + pip uninstall -y skl2onnx + python setup.py install pytest docs/tests --durations=0 --basetemp=temp --doctest-modules displayName: 'run documentation examples' condition: eq(variables['run.example'], '1') + # dot cannot be found but is missing to build the documentation + #- script: | + # if [ '$(onnx.target_opset)' != '' ] + # then + # export TEST_TARGET_OPSET="$(onnx.target_opset)" + # fi + # conda install -c anaconda graphviz + # python -m sphinx -j2 -v -N -T -b html -d build/doctrees docs dist/html + # displayName: 'build documentation' + # condition: eq(variables['run.example'], '1') + - script: | if [ '$(onnx.target_opset)' != '' ] then @@ -285,6 +294,7 @@ jobs: python tests/benchmark.py fi displayName: 'benchmark' + condition: eq(variables['run.example'], '0') - task: PublishTestResults@2 inputs: diff --git a/docs/conf.py b/docs/conf.py index 7d9c6351c..3d00e2017 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -7,17 +7,19 @@ import os import sys +import warnings import skl2onnx import sphinx_readable_theme import tabulate sys.path.append(os.path.abspath('exts')) +from github_link import make_linkcode_resolve # -- Project information ----------------------------------------------------- project = 'sklearn-onnx' -copyright = '2018-2019, Microsoft' +copyright = '2018-2020, Microsoft' author = 'Microsoft' version = skl2onnx.__version__ release = version @@ -35,6 +37,15 @@ 'sphinx.ext.autodoc', 'sphinx.ext.graphviz', 'sphinx_skl2onnx_extension', + 'matplotlib.sphinxext.plot_directive', + 'pyquickhelper.sphinxext.sphinx_cmdref_extension', + 'pyquickhelper.sphinxext.sphinx_collapse_extension', + 'pyquickhelper.sphinxext.sphinx_docassert_extension', + 'pyquickhelper.sphinxext.sphinx_epkg_extension', + 'pyquickhelper.sphinxext.sphinx_exref_extension', + 'pyquickhelper.sphinxext.sphinx_faqref_extension', + 'pyquickhelper.sphinxext.sphinx_gdot_extension', + 'pyquickhelper.sphinxext.sphinx_runpython_extension', "sphinxcontrib.blockdiag", ] @@ -65,11 +76,101 @@ # -- Options for Sphinx Gallery ---------------------------------------------- +linkcode_resolve = make_linkcode_resolve( + 'skl2onnx', + 'https://github.com/onnx/skl2onnx/blob/{revision}/' + '{package}/{path}#L{lineno}') + +intersphinx_mapping = { + 'joblib': ('https://joblib.readthedocs.io/en/latest/', None), + 'python': ('https://docs.python.org/{.major}'.format( + sys.version_info), None), + 'matplotlib': ('https://matplotlib.org/', None), + 'mlinsights': ( + 'http://www.xavierdupre.fr/app/mlinsights/helpsphinx/', None), + 'mlprodict': ( + 'http://www.xavierdupre.fr/app/mlprodict/helpsphinx/', None), + 'numpy': ('https://docs.scipy.org/doc/numpy/', None), + 'pyquickhelper': ( + 'http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/', None), + 'onnxmltools': ( + 'http://www.xavierdupre.fr/app/onnxmltools/helpsphinx/index.html', + None), + 'onnxruntime': ( + 'http://www.xavierdupre.fr/app/onnxruntime/helpsphinx/index.html', + None), + 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), + 'scipy': ('https://docs.scipy.org/doc/scipy/reference', None), + 'seaborn': ('https://seaborn.pydata.org/', None), + 'scikit-learn': ( + 'https://scikit-learn.org/stable/', + None), + 'sklearn': ( + 'https://scikit-learn.org/stable/', + None), + 'skl2onnx': ( + 'http://www.xavierdupre.fr/app/sklearn-onnx/helpsphinx/index.html', + None), + 'sklearn-onnx': ( + 'http://www.xavierdupre.fr/app/sklearn-onnx/helpsphinx/index.html', + None), +} + sphinx_gallery_conf = { - 'examples_dirs': 'examples', - 'gallery_dirs': 'auto_examples', + 'examples_dirs': ['examples', 'tutorial'], + 'gallery_dirs': ['auto_examples', 'auto_tutorial'], + 'capture_repr': ('_repr_html_', '__repr__'), + 'ignore_repr_types': r'matplotlib.text|matplotlib.axes', + 'binder': { + 'org': 'microsoft', + 'repo': 'skl2onnx', + 'binderhub_url': 'https://mybinder.org', + 'branch': 'master', + 'dependencies': os.path.abspath( + os.path.join(os.path.dirname(__file__), 'requirements.txt')), + 'use_jupyter_lab': True + }, } +epkg_dictionary = { + 'C': 'https://en.wikipedia.org/wiki/C_(programming_language)', + 'C++': 'https://en.wikipedia.org/wiki/C%2B%2B', + 'cython': 'https://cython.org/', + 'DOT': 'https://www.graphviz.org/doc/info/lang.html', + 'ImageNet': 'http://www.image-net.org/', + 'LightGBM': 'https://lightgbm.readthedocs.io/en/latest/', + 'lightgbm': 'https://lightgbm.readthedocs.io/en/latest/', + 'mlprodict': + 'http://www.xavierdupre.fr/app/mlprodict/helpsphinx/index.html', + 'NMF': + 'https://scikit-learn.org/stable/modules/generated/' + 'sklearn.decomposition.NMF.html', + 'numpy': 'https://numpy.org/', + 'onnx': 'https://github.com/onnx/onnx', + 'ONNX': 'https://onnx.ai/', + 'ONNX operators': + 'https://github.com/onnx/onnx/blob/master/docs/Operators.md', + 'ONNX ML operators': + 'https://github.com/onnx/onnx/blob/master/docs/Operators-ml.md', + 'onnxmltools': 'https://github.com/onnx/onnxmltools', + 'OnnxPipeline': + 'http://www.xavierdupre.fr/app/mlprodict/helpsphinx/mlprodict/' + 'sklapi/onnx_pipeline.html?highlight=onnxpipeline', + 'onnxruntime': 'https://microsoft.github.io/onnxruntime/', + 'openmp': 'https://en.wikipedia.org/wiki/OpenMP', + 'pyinstrument': 'https://github.com/joerick/pyinstrument', + 'python': 'https://www.python.org/', + 'pytorch': 'https://pytorch.org/', + 'scikit-learn': 'https://scikit-learn.org/stable/', + 'skorch': 'https://skorch.readthedocs.io/en/stable/', + 'sklearn-onnx': 'https://github.com/onnx/sklearn-onnx', + 'sphinx-gallery': 'https://github.com/sphinx-gallery/sphinx-gallery', + 'xgboost': 'https://xgboost.readthedocs.io/en/latest/', + 'XGBoost': 'https://xgboost.readthedocs.io/en/latest/', +} + +warnings.filterwarnings("ignore", category=FutureWarning) + # -- Setup actions ----------------------------------------------------------- def setup(app): diff --git a/docs/exts/github_link.py b/docs/exts/github_link.py new file mode 100644 index 000000000..b5d02d9e3 --- /dev/null +++ b/docs/exts/github_link.py @@ -0,0 +1,82 @@ +# Source: https://github.com/scikit-learn/scikit-learn/blob/ +# master/doc/sphinxext/github_link.py +from operator import attrgetter +import inspect +import subprocess +import os +import sys +from functools import partial + +REVISION_CMD = 'git rev-parse --short HEAD' + + +def _get_git_revision(): + try: + revision = subprocess.check_output(REVISION_CMD.split()).strip() + except (subprocess.CalledProcessError, OSError): + print('Failed to execute git to get revision') + return None + return revision.decode('utf-8') + + +def _linkcode_resolve(domain, info, package, url_fmt, revision): + """Determine a link to online source for a class/method/function + This is called by sphinx.ext.linkcode + An example with a long-untouched module that everyone has + >>> _linkcode_resolve('py', {'module': 'tty', + ... 'fullname': 'setraw'}, + ... package='tty', + ... url_fmt='http://hg.python.org/cpython/file/' + ... '{revision}/Lib/{package}/{path}#L{lineno}', + ... revision='xxxx') + 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' + """ + + if revision is None: + return + if domain not in ('py', 'pyx'): + return + if not info.get('module') or not info.get('fullname'): + return + + class_name = info['fullname'].split('.')[0] + module = __import__(info['module'], fromlist=[class_name]) + obj = attrgetter(info['fullname'])(module) + + # Unwrap the object to get the correct source + # file in case that is wrapped by a decorator + obj = inspect.unwrap(obj) + + try: + fn = inspect.getsourcefile(obj) + except Exception: + fn = None + if not fn: + try: + fn = inspect.getsourcefile(sys.modules[obj.__module__]) + except Exception: + fn = None + if not fn: + return + + fn = os.path.relpath(fn, + start=os.path.dirname(__import__(package).__file__)) + try: + lineno = inspect.getsourcelines(obj)[1] + except Exception: + lineno = '' + return url_fmt.format(revision=revision, package=package, + path=fn, lineno=lineno) + + +def make_linkcode_resolve(package, url_fmt): + """Returns a linkcode_resolve function for the given URL format + revision is a git commit reference (hash or name) + package is the name of the root module of the package + url_fmt is along the lines of ('https://github.com/USER/PROJECT/' + 'blob/{revision}/{package}/' + '{path}#L{lineno}') + """ + revision = _get_git_revision() + return partial(_linkcode_resolve, revision=revision, package=package, + url_fmt=url_fmt) diff --git a/docs/index.rst b/docs/index.rst index 9f4467927..6c363f0ba 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,7 +20,8 @@ toolkits into `ONNX `_. .. toctree:: :maxdepth: 1 - tutorial + introduction + index_tutorial api_summary auto_examples/index pipeline diff --git a/docs/index_tutorial.rst b/docs/index_tutorial.rst new file mode 100644 index 000000000..e7f14cde2 --- /dev/null +++ b/docs/index_tutorial.rst @@ -0,0 +1,44 @@ + +Tutorial +======== + +.. index:: tutorial + +The tutorial goes from a simple example which +converts a pipeline to a more complex example +involving operator not actually implemented in +:epkg:`ONNX operators` or :epkg:`ONNX ML operators`. + +.. toctree:: + :maxdepth: 2 + + tutorial_1_simple + tutorial_1-5_external + tutorial_2_new_converter + tutorial_3_new_operator + +The tutorial was tested with following version: + +.. runpython:: + :showcode: + + import numpy + import scipy + import sklearn + import lightgbm + import onnx + import onnxmltools + import onnxruntime + import xgboost + import skl2onnx + import mlprodict + import pyquickhelper + + mods = [numpy, scipy, sklearn, lightgbm, xgboost, + onnx, onnxmltools, onnxruntime, + skl2onnx, mlprodict, pyquickhelper] + mods = [(m.__name__, m.__version__) for m in mods] + mx = max(len(_[0]) for _ in mods) + 1 + for name, vers in sorted(mods): + print("%s%s%s" % (name, " " * (mx - len(name)), vers)) + diff --git a/docs/tutorial.rst b/docs/introduction.rst similarity index 99% rename from docs/tutorial.rst rename to docs/introduction.rst index 362dc882a..8609d531b 100644 --- a/docs/tutorial.rst +++ b/docs/introduction.rst @@ -1,7 +1,7 @@ -======== -Tutorial -======== +============ +Introduction +============ .. contents:: :local: diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 000000000..515de852f --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,32 @@ +autopep8 +codecov +coverage +flake8 +joblib +lightgbm +loky +matplotlib +mlinsights>=0.2.508 +mlprodict>=0.4.1259 +nbsphinx +onnx +onnxruntime +pillow +py-spy +pandas +pydot +pyinstrument +pyquickhelper>=1.9.3359 +pytest +pytest-cov +scikit-learn>=0.23 +skl2onnx +sphinx +sphinx_readable_theme +sphinxcontrib-blockdiag +sphinxcontrib.imagesvg +sphinx-gallery +tabulate +tqdm +wheel +xgboost diff --git a/docs/tests/test_documentation_examples.py b/docs/tests/test_documentation_examples.py index 1b0a3e154..07b7b4a4c 100644 --- a/docs/tests/test_documentation_examples.py +++ b/docs/tests/test_documentation_examples.py @@ -7,6 +7,7 @@ import sys import importlib import subprocess +import numpy import onnxruntime @@ -59,14 +60,6 @@ def test_documentation_examples(self): # dot not installed, this part # is tested in onnx framework pass - elif "No module named 'xgboost'" in st: - # xgboost not installed on CI - pass - elif ("cannot import name 'LightGbmModelContainer' " - "from 'onnxmltools.convert.common." - "_container'") in st: - # onnxmltools not recent enough - pass elif ('Please fix either the inputs or ' 'the model.') in st: # onnxruntime datasets changed in master branch, diff --git a/docs/tests/test_documentation_tutorial.py b/docs/tests/test_documentation_tutorial.py new file mode 100644 index 000000000..ba3d09f10 --- /dev/null +++ b/docs/tests/test_documentation_tutorial.py @@ -0,0 +1,81 @@ +""" +Tests examples from the documentation. +""" +import unittest +from distutils.version import StrictVersion +import os +import sys +import importlib +import subprocess +import numpy +import onnxruntime + + +def import_source(module_file_path, module_name): + if not os.path.exists(module_file_path): + raise FileNotFoundError(module_file_path) + module_spec = importlib.util.spec_from_file_location( + module_name, module_file_path) + if module_spec is None: + raise FileNotFoundError( + "Unable to find '{}' in '{}'.".format( + module_name, module_file_path)) + module = importlib.util.module_from_spec(module_spec) + return module_spec.loader.exec_module(module) + + +class TestDocumentationTutorial(unittest.TestCase): + + def test_documentation_tutorial(self): + + this = os.path.abspath(os.path.dirname(__file__)) + fold = os.path.normpath(os.path.join(this, '..', 'tutorial')) + found = os.listdir(fold) + tested = 0 + for name in found: + if name.startswith("plot_") and name.endswith(".py"): + print("run %r" % name) + try: + mod = import_source(fold, os.path.splitext(name)[0]) + assert mod is not None + except FileNotFoundError: + # try another way + cmds = [sys.executable, "-u", + os.path.join(fold, name)] + p = subprocess.Popen( + cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + res = p.communicate() + out, err = res + st = err.decode('ascii', errors='ignore') + if len(st) > 0 and 'Traceback' in st: + if "No such file or directory: 'dot': 'dot'" in st: + # dot not installed, this part + # is tested in onnx framework + pass + elif '"dot" not found in path.' in st: + # dot not installed, this part + # is tested in onnx framework + pass + elif ("cannot import name 'LightGbmModelContainer' " + "from 'onnxmltools.convert.common." + "_container'") in st: + # onnxmltools not recent enough + pass + elif ('Please fix either the inputs or ' + 'the model.') in st: + # onnxruntime datasets changed in master branch, + # still the same in released version on pypi + pass + else: + installed = os.listdir(os.path.dirname(numpy.__file__)) + raise RuntimeError( + "Example '{}' (cmd: {} - exec_prefix='{}') " + "failed due to\n{}" + "".format(name, cmds, sys.exec_prefix, st)) + tested += 1 + if tested == 0: + raise RuntimeError("No example was tested.") + + +if __name__ == "__main__": + unittest.main() diff --git a/docs/tests/test_utils_benchmark.py b/docs/tests/test_utils_benchmark.py new file mode 100644 index 000000000..2aa8d0cf5 --- /dev/null +++ b/docs/tests/test_utils_benchmark.py @@ -0,0 +1,28 @@ +""" +@brief test log(time=3s) +""" + +import unittest +import numpy +from skl2onnx.tutorial import measure_time + + +class TestMeasureTime(unittest.TestCase): + + def test_vector_count(self): + def fct(): + X = numpy.ones((1000, 5)) + return X + res = measure_time( + "fct", context={"fct": fct}, div_by_number=False, number=100) + self.assertIn("average", res) + res = measure_time( + "fct", context={"fct": fct}, div_by_number=True, number=100) + self.assertIn("average", res) + res = measure_time( + "fct", context={"fct": fct}, div_by_number=True, number=1000) + self.assertIn("average", res) + + +if __name__ == "__main__": + unittest.main() diff --git a/docs/tests/test_utils_classes.py b/docs/tests/test_utils_classes.py new file mode 100644 index 000000000..efb1f147f --- /dev/null +++ b/docs/tests/test_utils_classes.py @@ -0,0 +1,18 @@ +""" +@brief test log(time=3s) +""" + +import unittest +from skl2onnx.tutorial.imagenet_classes import class_names + + +class TestUtilsClasses(unittest.TestCase): + + def test_classes(self): + cl = class_names + self.assertIsInstance(cl, dict) + self.assertEqual(len(cl), 1000) + + +if __name__ == "__main__": + unittest.main() diff --git a/docs/tutorial/README.txt b/docs/tutorial/README.txt new file mode 100644 index 000000000..bac945d55 --- /dev/null +++ b/docs/tutorial/README.txt @@ -0,0 +1,2 @@ +Examples +======== diff --git a/docs/tutorial/plot_abegin_convert_pipeline.py b/docs/tutorial/plot_abegin_convert_pipeline.py new file mode 100644 index 000000000..1fdc16fe7 --- /dev/null +++ b/docs/tutorial/plot_abegin_convert_pipeline.py @@ -0,0 +1,124 @@ +""" +Train and deploy a scikit-learn pipeline +======================================== + +.. index:: pipeline, deployment + +This program starts from an example in :epkg:`scikit-learn` +documentation: `Plot individual and voting regression predictions +`_, +converts it into ONNX and finally computes the predictions +a different runtime. + +.. contents:: + :local: + + +Training a pipeline ++++++++++++++++++++ +""" +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +import numpy +from onnxruntime import InferenceSession +from sklearn.datasets import load_diabetes +from sklearn.ensemble import ( + GradientBoostingRegressor, RandomForestRegressor, + VotingRegressor) +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split +from sklearn.pipeline import Pipeline +from skl2onnx import to_onnx +from mlprodict.onnxrt import OnnxInference + + +X, y = load_diabetes(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y) + +# Train classifiers +reg1 = GradientBoostingRegressor(random_state=1) +reg2 = RandomForestRegressor(random_state=1) +reg3 = LinearRegression() + +ereg = Pipeline(steps=[ + ('voting', VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])), +]) +ereg.fit(X_train, y_train) + +################################# +# Converts the model +# ++++++++++++++++++ +# +# The second argument gives a sample of the data +# used to train the model. It is used to infer +# the input type of the ONNX graph. It is converted +# into single float and ONNX runtimes may not fully +# support doubles. + +onx = to_onnx(ereg, X_train[:1].astype(numpy.float32)) + +################################### +# Prediction with ONNX +# ++++++++++++++++++++ +# +# The first example uses :epkg:`onnxruntime`. + +sess = InferenceSession(onx.SerializeToString()) +pred_ort = sess.run(None, {'X': X_test.astype(numpy.float32)})[0] + +pred_skl = ereg.predict(X_test.astype(numpy.float32)) + +pred_ort[:5], pred_skl[:5] + +#################################### +# .. _l-diff-dicrepencies: +# +# Comparison +# ++++++++++ +# +# Before deploying, we need to compare that both +# *scikit-learn* and *ONNX* return the same predictions. + + +def diff(p1, p2): + p1 = p1.ravel() + p2 = p2.ravel() + d = numpy.abs(p2 - p1) + return d.max(), (d / numpy.abs(p1)).max() + + +print(diff(pred_skl, pred_ort)) + +############################################ +# It looks good. Biggest errors (absolute and relative) +# are within the margin error introduced by using +# floats instead of doubles. +# We can save the model into ONNX +# format and compute the same predictions in many +# platform using :epkg:`onnxruntime`. + +#################################### +# Python runtime +# ++++++++++++++ +# +# A python runtime can be used as well to compute +# the prediction. It is not meant to be used into +# production (it still relies on python), but it is +# useful to investigate why the conversion went wrong. +# It uses module :epkg:`mlprodict`. + +oinf = OnnxInference(onx, runtime="python_compiled") +print(oinf) + +########################################## +# It works almost the same way. + +pred_pyrt = oinf.run({'X': X_test.astype(numpy.float32)})['variable'] +print(diff(pred_skl, pred_pyrt)) + +############################# +# Final graph +# +++++++++++ + +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) diff --git a/docs/tutorial/plot_bbegin_measure_time.py b/docs/tutorial/plot_bbegin_measure_time.py new file mode 100644 index 000000000..c36c4ef8d --- /dev/null +++ b/docs/tutorial/plot_bbegin_measure_time.py @@ -0,0 +1,137 @@ +""" +Benchmark ONNX conversion +========================= + +.. index:: benchmark + +Example :ref:`l-simple-deploy-1` converts a simple model. +This example takes a similar example but on random data +and compares the processing time required by each option +to compute predictions. + +.. contents:: + :local: + + +Training a pipeline ++++++++++++++++++++ +""" +import numpy +from pandas import DataFrame +from tqdm import tqdm +from sklearn import config_context +from sklearn.datasets import make_regression +from sklearn.ensemble import ( + GradientBoostingRegressor, RandomForestRegressor, + VotingRegressor) +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split +from mlprodict.onnxrt import OnnxInference +from onnxruntime import InferenceSession +from skl2onnx import to_onnx +from skl2onnx.tutorial import measure_time + + +N = 11000 +X, y = make_regression(N, n_features=10) +X_train, X_test, y_train, y_test = train_test_split( + X, y, train_size=0.01) +print("Train shape", X_train.shape) +print("Test shape", X_test.shape) + +reg1 = GradientBoostingRegressor(random_state=1) +reg2 = RandomForestRegressor(random_state=1) +reg3 = LinearRegression() +ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)]) +ereg.fit(X_train, y_train) + +################################# +# Measure the processing time +# +++++++++++++++++++++++++++ +# +# We use function :func:`skl2onnx.tutorial.measure_time`. +# The page about `assume_finite `_ +# may be useful if you need to optimize the prediction. +# We measure the processing time per observation whether +# or not an observation belongs to a batch or is a single one. + +sizes = [(1, 50), (10, 50), (1000, 10), (10000, 5)] + +with config_context(assume_finite=True): + obs = [] + for batch_size, repeat in tqdm(sizes): + context = {"ereg": ereg, 'X': X_test[:batch_size]} + mt = measure_time( + "ereg.predict(X)", context, div_by_number=True, + number=10, repeat=repeat) + mt['size'] = context['X'].shape[0] + mt['mean_obs'] = mt['average'] / mt['size'] + obs.append(mt) + +df_skl = DataFrame(obs) +df_skl + +##################################### +# Graphe. + +df_skl.set_index('size')[['mean_obs']].plot( + title="scikit-learn", logx=True, logy=True) + +############################### +# ONNX runtime +# ++++++++++++ +# +# The same is done with the two ONNX runtime +# available. + +onx = to_onnx(ereg, X_train[:1].astype(numpy.float32)) +sess = InferenceSession(onx.SerializeToString()) +oinf = OnnxInference(onx, runtime="python_compiled") + +obs = [] +for batch_size, repeat in tqdm(sizes): + + # scikit-learn + context = {"ereg": ereg, 'X': X_test[:batch_size].astype(numpy.float32)} + mt = measure_time( + "ereg.predict(X)", context, div_by_number=True, + number=10, repeat=repeat) + mt['size'] = context['X'].shape[0] + mt['skl'] = mt['average'] / mt['size'] + + # onnxruntime + context = {"sess": sess, 'X': X_test[:batch_size].astype(numpy.float32)} + mt2 = measure_time( + "sess.run(None, {'X': X})[0]", context, div_by_number=True, + number=10, repeat=repeat) + mt['ort'] = mt2['average'] / mt['size'] + + # mlprodict + context = {"oinf": oinf, 'X': X_test[:batch_size].astype(numpy.float32)} + mt2 = measure_time( + "oinf.run({'X': X})['variable']", context, div_by_number=True, + number=10, repeat=repeat) + mt['pyrt'] = mt2['average'] / mt['size'] + + # end + obs.append(mt) + + +df = DataFrame(obs) +df + +##################################### +# Graph. + +df.set_index('size')[['skl', 'ort', 'pyrt']].plot( + title="Average prediction time per runtime", + logx=True, logy=True) + +##################################### +# :epkg:`ONNX` runtimes are much faster than :epkg:`scikit-learn` +# to predict one observation. :epkg:`scikit-learn` is optimized +# for training, for batch prediction. That explains why +# :epkg:`scikit-learn` and ONNX runtimes seem to converge +# for big batches. They use similar implementation, +# parallelization and languages (:epkg:`C++`, :epkg:`openmp`). diff --git a/docs/tutorial/plot_cbegin_opset.py b/docs/tutorial/plot_cbegin_opset.py new file mode 100644 index 000000000..7bc0d82b5 --- /dev/null +++ b/docs/tutorial/plot_cbegin_opset.py @@ -0,0 +1,122 @@ +""" +What is the opset number? +========================= + +.. index:: opset, target opset, version + +Every library is versioned. :epkg:`scikit-learn` may change +the implementation of a specific model. That happens +for example with the `SVC `_ model where +the parameter *break_ties* was added in 0.22. :epkg:`ONNX` +does also have a version called *opset number*. +Operator *ArgMin* was added in opset 1 and changed in opset +11, 12, 13. Sometimes, it is updated to extend the list +of types it supports, sometimes, it moves a parameter +into the input list. The runtime used to deploy the model +does not implement a new version, in that case, a model +must be converted by usually using the most recent opset +supported by the runtime, we call that opset the +*targeted opset*. An ONNX graph only contains +one unique opset, every node must be described following +the specifications defined by the latest opset below the +targeted opset. + +This example considers an `IsolationForest +`_ and digs into opsets. + +.. contents:: + :local: + +Data +++++ + +A simple example. +""" +from onnx.defs import onnx_opset_version +from skl2onnx import to_onnx +import numpy +import matplotlib.pyplot as plt +from sklearn.ensemble import IsolationForest +from sklearn.datasets import make_blobs + +X, y = make_blobs(n_samples=100, n_features=2) + +model = IsolationForest(3) +model.fit(X) +labels = model.predict(X) + +fig, ax = plt.subplots(1, 1) +for k in (0, 1): + ax.plot(X[labels == k, 0], X[labels == k, 1], 'o', label="cl%d" % k) +ax.set_title("Sample") + +####################################### +# ONNX +# ++++ + + +onx = to_onnx(model, X[:1].astype(numpy.float32)) +print(onx) + +########################## +# The last line shows the opsets. +# Let's extract it. + +domains = onx.opset_import +for dom in domains: + print("domain: %r, version: %r" % (dom.domain, dom.version)) + +################################### +# There are two opsets, one for standard operators, +# the other for machine learning operators. + +######################################## +# ONNX and opset +# ++++++++++++++ +# +# The converter can convert a model to an older opset +# than the default one, from 1 to the last available one. + + +def get_domain_opset(onx): + domains = onx.opset_import + res = [{'domain': dom.domain, 'version': dom.version} + for dom in domains] + return {d['domain']: d['version'] for d in res} + + +for opset in range(1, onnx_opset_version() + 1): + try: + onx = to_onnx(model, X[:1].astype(numpy.float32), target_opset=opset) + except RuntimeError as e: + print('target: %r error: %r' % (opset, e)) + continue + nodes = len(onx.graph.node) + print('target: %r --> %s %d' % (opset, get_domain_opset(onx), nodes)) + +######################################## +# It shows that the model cannot be converted for opset +# below 5. Operator `Reshape `_ changed in +# opset 5: a parameter became an input. The converter +# does not support *opset < 5* because runtimes usually do not. +# +# Other opsets +# ++++++++++++ +# +# The previous example changed the opset of the main domain +# ``''`` but the other opset domain can be changed as well. + +for opset in range(9, onnx_opset_version() + 1): + for opset_ml in range(1, 3): + tops = {'': opset, 'ai.onnx.ml': opset_ml} + try: + onx = to_onnx( + model, X[:1].astype(numpy.float32), target_opset=tops) + except RuntimeError as e: + print('target: %r error: %r' % (opset, e)) + continue + nodes = len(onx.graph.node) + print('target: %r --> %s %d' % (opset, get_domain_opset(onx), nodes)) diff --git a/docs/tutorial/plot_dbegin_options.py b/docs/tutorial/plot_dbegin_options.py new file mode 100644 index 000000000..17a96421e --- /dev/null +++ b/docs/tutorial/plot_dbegin_options.py @@ -0,0 +1,250 @@ +""" +One model, many possible conversions with options +================================================= + +.. index:: options + +There is not one way to convert a model. A new operator +might have been added in a newer version of :epkg:`ONNX` +and that speeds up the converted model. The rational choice +would be to use this new operator but what means the associated +runtime has an implementation for it. What if two different +users needs two different conversion for the same model? +Let's see how this may be done. + +.. contents:: + :local: + + +Option *zipmap* ++++++++++++++++ + +Every classifier is by design converted into an ONNX graph which outputs +two results: the predicted label and the prediction probabilites +for every label. By default, the labels are integers and the +probabilites are stored in dictionaries. That's the purpose +of operator *ZipMap* added at the end of the following graph. + +.. gdot:: + :script: DOT-SECTION + + import numpy + from sklearn.datasets import load_iris + from sklearn.model_selection import train_test_split + from sklearn.linear_model import LogisticRegression + from skl2onnx import to_onnx + from mlprodict.onnxrt import OnnxInference + + iris = load_iris() + X, y = iris.data, iris.target + X_train, _, y_train, __ = train_test_split(X, y, random_state=11) + clr = LogisticRegression() + clr.fit(X_train, y_train) + + model_def = to_onnx(clr, X_train.astype(numpy.float32)) + oinf = OnnxInference(model_def) + print("DOT-SECTION", oinf.to_dot()) + +This operator is not really efficient as it copies every probabilies and +labels in a different container. This time is usually significant for +small classifiers. Then it makes sense to remove it. + +.. gdot:: + :script: DOT-SECTION + + import numpy + from sklearn.datasets import load_iris + from sklearn.model_selection import train_test_split + from sklearn.linear_model import LogisticRegression + from skl2onnx import to_onnx + from mlprodict.onnxrt import OnnxInference + + iris = load_iris() + X, y = iris.data, iris.target + X_train, _, y_train, __ = train_test_split(X, y, random_state=11) + clr = LogisticRegression() + clr.fit(X_train, y_train) + + model_def = to_onnx(clr, X_train.astype(numpy.float32), + options={LogisticRegression: {'zipmap': False}}) + oinf = OnnxInference(model_def) + print("DOT-SECTION", oinf.to_dot()) + +There might be in the graph many classifiers, it is important to have +a way to specify which classifier should keep its *ZipMap* +and which is not. So it is possible to specify options by id. +""" + +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +from pprint import pformat +from skl2onnx.common._registration import _converter_pool +from sklearn.preprocessing import MinMaxScaler +from sklearn.pipeline import Pipeline +import numpy +from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LogisticRegression +from skl2onnx import to_onnx +from mlprodict.onnxrt import OnnxInference + +iris = load_iris() +X, y = iris.data, iris.target +X_train, _, y_train, __ = train_test_split(X, y, random_state=11) +clr = LogisticRegression() +clr.fit(X_train, y_train) + +model_def = to_onnx(clr, X_train.astype(numpy.float32), + options={id(clr): {'zipmap': False}}) +oinf = OnnxInference(model_def, runtime='python_compiled') +print(oinf) + +################################## +# Visually. + +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) + + +########################################## +# We need to compare that kind of visualisation to +# what it would give with operator *ZipMap*. + +model_def = to_onnx(clr, X_train.astype(numpy.float32)) +oinf = OnnxInference(model_def, runtime='python_compiled') +print(oinf) + +################################## +# Visually. + +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) + + +####################################### +# Using function *id* has one flaw: it is not pickable. +# It is just better to use strings. + +model_def = to_onnx(clr, X_train.astype(numpy.float32), + options={'zipmap': False}) +oinf = OnnxInference(model_def, runtime='python_compiled') +print(oinf) + + +################################## +# Visually. + +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) + + +####################################### +# Option in a pipeline +# ++++++++++++++++++++ +# +# In a pipeline, :epkg:`sklearn-onnx` uses the same +# name convention. + + +pipe = Pipeline([ + ('norm', MinMaxScaler()), + ('clr', LogisticRegression()) +]) +pipe.fit(X_train, y_train) + +model_def = to_onnx(pipe, X_train.astype(numpy.float32), + options={'clr__zipmap': False}) +oinf = OnnxInference(model_def, runtime='python_compiled') +print(oinf) + +################################## +# Visually. + +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) + + +####################################### +# Option *raw_scores* +# +++++++++++++++++++ +# +# Every classifier is converted in a graph which +# returns probabilities by default. But many models +# compute unscaled *raw_scores*. +# First, with probabilities: + + +pipe = Pipeline([ + ('norm', MinMaxScaler()), + ('clr', LogisticRegression()) +]) +pipe.fit(X_train, y_train) + +model_def = to_onnx( + pipe, X_train.astype(numpy.float32), + options={id(pipe): {'zipmap': False}}) + +oinf = OnnxInference(model_def, runtime='python_compiled') +print(oinf.run({'X': X.astype(numpy.float32)[:5]})) + + +####################################### +# Then with raw scores: + +model_def = to_onnx( + pipe, X_train.astype(numpy.float32), + options={id(pipe): {'raw_scores': True, 'zipmap': False}}) + +oinf = OnnxInference(model_def, runtime='python_compiled') +print(oinf.run({'X': X.astype(numpy.float32)[:5]})) + + +######################################### +# It did not seem to work... We need to tell +# that applies on a specific part of the pipeline +# and not the whole pipeline. + +model_def = to_onnx( + pipe, X_train.astype(numpy.float32), + options={id(pipe.steps[1][1]): {'raw_scores': True, 'zipmap': False}}) + +oinf = OnnxInference(model_def, runtime='python_compiled') +print(oinf.run({'X': X.astype(numpy.float32)[:5]})) + +########################################### +# There are negative values. That works. +# Strings are still easier to use. + +model_def = to_onnx( + pipe, X_train.astype(numpy.float32), + options={'clr__raw_scores': True, 'clr__zipmap': False}) + +oinf = OnnxInference(model_def, runtime='python_compiled') +print(oinf.run({'X': X.astype(numpy.float32)[:5]})) + + +######################################### +# Negative figures. We still have raw scores. + +############################################################ +# List of available options +# +++++++++++++++++++++++++ +# +# Options are registered for every converted to detect any +# supported options while running the conversion. + + +all_opts = set() +for k, v in sorted(_converter_pool.items()): + opts = v.get_allowed_options() + if not isinstance(opts, dict): + continue + name = k.replace('Sklearn', '') + print('%s%s %r' % (name, " " * (30 - len(name)), opts)) + for o in opts: + all_opts.add(o) + +print('all options:', pformat(list(sorted(all_opts)))) diff --git a/docs/tutorial/plot_dbegin_options_list.py b/docs/tutorial/plot_dbegin_options_list.py new file mode 100644 index 000000000..2972e4cff --- /dev/null +++ b/docs/tutorial/plot_dbegin_options_list.py @@ -0,0 +1,119 @@ +""" +Black list operators when converting +==================================== + +.. index:: black list, white list + +Some runtimes do not implement a runtime for every +available operator in ONNX. The converter does not know +that but it is possible to black some operators. Most of +the converters do not change their behaviour, they fail +if they use a black listed operator, a couple of them +produces a different ONNX graph. + +.. contents:: + :local: + +GaussianMixture ++++++++++++++++ + +The first converter to change its behaviour depending on a black list +of operators is for model *GaussianMixture*. +""" +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +from mlprodict.onnxrt import OnnxInference +from timeit import timeit +import numpy +from onnxruntime import InferenceSession +from sklearn.mixture import GaussianMixture +from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split +from skl2onnx import to_onnx + +data = load_iris() +X_train, X_test = train_test_split(data.data) +model = GaussianMixture() +model.fit(X_train) + +################################### +# Default conversion +# ++++++++++++++++++ + +model_onnx = to_onnx( + model, X_train[:1].astype(numpy.float32), + options={id(model): {'score_samples': True}}, + target_opset=12) +sess = InferenceSession(model_onnx.SerializeToString()) + +xt = X_test[:5].astype(numpy.float32) +print(model.score_samples(xt)) +print(sess.run(None, {'X': xt})[2]) + + +################################## +# Display the ONNX graph. + + +oinf = OnnxInference(model_onnx) +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) + +################################### +# Conversion without ReduceLogSumExp +# ++++++++++++++++++++++++++++++++++ +# +# Parameter *black_op* is used to tell the converter +# not to use this operator. Let's see what the converter +# produces in that case. + +model_onnx2 = to_onnx( + model, X_train[:1].astype(numpy.float32), + options={id(model): {'score_samples': True}}, + black_op={'ReduceLogSumExp'}, + target_opset=12) +sess2 = InferenceSession(model_onnx2.SerializeToString()) + +xt = X_test[:5].astype(numpy.float32) +print(model.score_samples(xt)) +print(sess2.run(None, {'X': xt})[2]) + +################################## +# Display the ONNX graph. + +oinf = OnnxInference(model_onnx2) +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) + + +####################################### +# Processing time +# +++++++++++++++ + +print(timeit(stmt="sess.run(None, {'X': xt})", + number=10000, globals={'sess': sess, 'xt': xt})) + +print(timeit(stmt="sess2.run(None, {'X': xt})", + number=10000, globals={'sess2': sess2, 'xt': xt})) + +################################# +# The model using ReduceLogSumExp is much faster. + +########################################## +# If the converter cannot convert without... +# ++++++++++++++++++++++++++++++++++++++++++ +# +# Many converters do not consider the white and black lists +# of operators. If a converter fails to convert without using +# a blacklisted operator (or only whitelisted operators), +# *skl2onnx* raises an error. + +try: + to_onnx( + model, X_train[:1].astype(numpy.float32), + options={id(model): {'score_samples': True}}, + black_op={'ReduceLogSumExp', 'Add'}, + target_opset=12) +except RuntimeError as e: + print('Error:', e) diff --git a/docs/tutorial/plot_ebegin_float_double.py b/docs/tutorial/plot_ebegin_float_double.py new file mode 100644 index 000000000..0d6790b36 --- /dev/null +++ b/docs/tutorial/plot_ebegin_float_double.py @@ -0,0 +1,358 @@ +""" +.. _l-example-discrepencies-float-double: + +Issues when switching to float +============================== + +.. index:: float, double, discrepencies + +Most models in :epkg:`scikit-learn` do computation with double, +not float. Most models in deep learning use float because +that's the most common situation with GPU. ONNX was initially +created to facilitate the deployment of deep learning models +and that explains why many converters assume the converted models +should use float. That assumption does not usually harm +the predictions, the conversion to float introduce small +discrepencies compare to double predictions. +That assumption is usually true if the prediction +function is continuous, :math:`y = f(x)`, then +:math:`dy = f'(x) dx`. We can determine an upper bound +to the discrepencies : +:math:`\\Delta(y) \\leqslant \\sup_x \\norm{f'(x)} dx`. +*dx* is the discrepency introduced by a float conversion, +``dx = x - numpy.float32(x)``. + +However, that's not the case for every model. A decision tree +trained for a regression is not a continuous function. Therefore, +even a small *dx* may introduce a huge discrepency. Let's look into +an example which always produces discrepencies and some ways +to overcome this situation. + +.. contents:: + :local: + +More into the issue ++++++++++++++++++++ + +The below example is built to fail. +It contains integer features with different order +of magnitude rounded to integer. A decision tree compares +features to thresholds. In most cases, float and double +comparison gives the same result. We denote +:math:`[x]_{f32}` the conversion (or cast) +``numpy.float32(x)``. + +.. math:: + + x \\leqslant y = [x]_{f32} \\leqslant [y]_{f32} + +However, the probability that both comparisons give +different results is not null. The following graph shows +the discord areas. +""" +from mlprodict.sklapi import OnnxPipeline +from skl2onnx.sklapi import CastTransformer, CastRegressor +from skl2onnx import to_onnx +from mlprodict.onnx_conv import to_onnx as to_onnx_extended +from mlprodict.onnxrt import OnnxInference +from onnxruntime import InferenceSession +from sklearn.model_selection import train_test_split +from sklearn.tree import DecisionTreeRegressor +from sklearn.preprocessing import StandardScaler +from sklearn.pipeline import Pipeline +from sklearn.datasets import make_regression +import numpy +import matplotlib.pyplot as plt + + +def area_mismatch_rule(N, delta, factor, rule=None): + if rule is None: + def rule(t): return numpy.float32(t) + xst = [] + yst = [] + xsf = [] + ysf = [] + for x in range(-N, N): + for y in range(-N, N): + dx = (1. + x * delta) * factor + dy = (1. + y * delta) * factor + c1 = 1 if numpy.float64(dx) <= numpy.float64(dy) else 0 + c2 = 1 if numpy.float32(dx) <= rule(dy) else 0 + key = abs(c1 - c2) + if key == 1: + xsf.append(dx) + ysf.append(dy) + else: + xst.append(dx) + yst.append(dy) + return xst, yst, xsf, ysf + + +delta = 36e-10 +factor = 1 +xst, yst, xsf, ysf = area_mismatch_rule(100, delta, factor) + + +fig, ax = plt.subplots(1, 1, figsize=(5, 5)) +ax.plot(xst, yst, '.', label="agree") +ax.plot(xsf, ysf, '.', label="disagree") +ax.set_title("Region where x <= y and (float)x <= (float)y agree") +ax.set_xlabel("x") +ax.set_ylabel("y") +ax.plot([min(xst), max(xst)], [min(yst), max(yst)], 'k--') +ax.legend() + + +##################################### +# The pipeline and the data +# +++++++++++++++++++++++++ +# +# We can now build an example where the learned decision tree +# does many comparisons in this discord area. This is done +# by rounding features to integers, a frequent case +# happening when dealing with categorical features. + + +X, y = make_regression(10000, 10) +X_train, X_test, y_train, y_test = train_test_split(X, y) + +Xi_train, yi_train = X_train.copy(), y_train.copy() +Xi_test, yi_test = X_test.copy(), y_test.copy() +for i in range(X.shape[1]): + Xi_train[:, i] = (Xi_train[:, i] * 2 ** i).astype(numpy.int64) + Xi_test[:, i] = (Xi_test[:, i] * 2 ** i).astype(numpy.int64) + +max_depth = 10 + +model = Pipeline([ + ('scaler', StandardScaler()), + ('dt', DecisionTreeRegressor(max_depth=max_depth)) +]) + +model.fit(Xi_train, yi_train) + +####################################### +# The discrepencies +# +++++++++++++++++ +# +# Let's reuse the function implemented in the +# first example :ref:`l-diff-dicrepencies` and +# look into the conversion. + + +def diff(p1, p2): + p1 = p1.ravel() + p2 = p2.ravel() + d = numpy.abs(p2 - p1) + return d.max(), (d / numpy.abs(p1)).max() + + +onx = to_onnx(model, Xi_train[:1].astype(numpy.float32)) + +sess = InferenceSession(onx.SerializeToString()) + +X32 = Xi_test.astype(numpy.float32) + +skl = model.predict(X32) +ort = sess.run(None, {'X': X32})[0] + +print(diff(skl, ort)) + +################################### +# The discrepencies are significant. +# The ONNX model keeps float at every step. +# +# .. blockdiag:: +# +# diagram { +# x_float32 -> normalizer -> y_float32 -> dtree -> z_float32 +# } +# +# In :epkg:`scikit-learn`: +# +# .. blockdiag:: +# +# diagram { +# x_float32 -> normalizer -> y_double -> dtree -> z_double +# } +# +# CastTransformer +# +++++++++++++++ +# +# We could try to use double everywhere. Unfortunately, +# :epkg:`ONNX ML Operators` only allows float coefficients +# for the operator *TreeEnsembleRegressor*. We may want +# to compromise by casting the output of the normalizer into +# float in the :epkg:`scikit-learn` pipeline. +# +# .. blockdiag:: +# +# diagram { +# x_float32 -> normalizer -> y_double -> +# cast -> y_float -> dtree -> z_float +# } +# + + +model2 = Pipeline([ + ('scaler', StandardScaler()), + ('cast', CastTransformer()), + ('dt', DecisionTreeRegressor(max_depth=max_depth)) +]) + +model2.fit(Xi_train, yi_train) + +########################################## +# The discrepencies. + +onx2 = to_onnx(model2, Xi_train[:1].astype(numpy.float32)) + +sess2 = InferenceSession(onx2.SerializeToString()) + +skl2 = model2.predict(X32) +ort2 = sess2.run(None, {'X': X32})[0] + +print(diff(skl2, ort2)) + +###################################### +# That still fails because the normalizer +# in :epkg:`scikit-learn` and in :epkg:`ONNX` +# use different types. The cast still happens and +# the *dx* is still here. To remove it, we need to use +# double in ONNX normalizer. + +model3 = Pipeline([ + ('cast64', CastTransformer(dtype=numpy.float64)), + ('scaler', StandardScaler()), + ('cast', CastTransformer()), + ('dt', DecisionTreeRegressor(max_depth=max_depth)) +]) + +model3.fit(Xi_train, yi_train) +onx3 = to_onnx(model3, Xi_train[:1].astype(numpy.float32), + options={StandardScaler: {'div': 'div_cast'}}) + +sess3 = InferenceSession(onx3.SerializeToString()) + +skl3 = model3.predict(X32) +ort3 = sess3.run(None, {'X': X32})[0] + +print(diff(skl3, ort3)) + +################################# +# It works. That also means that it is difficult to change +# the computation type when a pipeline includes a discontinuous +# function. It is better to keep the same types all along +# before using a decision tree. +# +# Sledgehammer +# ++++++++++++ +# +# The idea here is to always train the next step based +# on ONNX outputs. That way, every step of the pipeline +# is trained based on ONNX output. +# +# * Trains the first step. +# * Converts the step into ONNX +# * Computes ONNX outputs. +# * Trains the second step on these outputs. +# * Converts the second step into ONNX. +# * Merges it with the first step. +# * Computes ONNX outputs of the merged two first steps. +# * ... +# +# It is implemented in +# class :epkg:`OnnxPipeline`. + + +model_onx = OnnxPipeline([ + ('scaler', StandardScaler()), + ('dt', DecisionTreeRegressor(max_depth=max_depth)) +]) + +model_onx.fit(Xi_train, yi_train) + +############################################# +# The conversion. + +onx4 = to_onnx(model_onx, Xi_train[:1].astype(numpy.float32)) + +sess4 = InferenceSession(onx4.SerializeToString()) + +skl4 = model_onx.predict(X32) +ort4 = sess4.run(None, {'X': X32})[0] + +print(diff(skl4, ort4)) + +################################# +# It works too in a more simple way. + +######################################## +# No discrepencies at all? +# ++++++++++++++++++++++++ +# +# Is it possible to get no error at all? +# There is one major obstacle: :epkg:`scikit-learn` +# stores the predicted values in every leave with double +# (`_tree.pyx - _get_value_ndarray +# `_), :epkg:`ONNX` defines the +# the predicted values as floats: :epkg:`TreeEnsembleRegressor`. +# What can we do to solve it? +# What if we could extend ONNX specifications to support +# double instead of floats. +# We reuse what was developped in example +# `Other way to convert `_ +# and a custom ONNX node `TreeEnsembleRegressorDouble +# `_. + + +tree = DecisionTreeRegressor(max_depth=max_depth) +tree.fit(Xi_train, yi_train) + +model_onx = to_onnx_extended(tree, Xi_train[:1].astype(numpy.float64), + rewrite_ops=True) + +oinf5 = OnnxInference(model_onx, runtime='python_compiled') +print(oinf5) + +########################################## +# Let's measure the discrepencies. + +X64 = Xi_test.astype(numpy.float64) +skl5 = tree.predict(X64) +ort5 = oinf5.run({'X': X64})['variable'] + +############################################ +# Perfect, no discrepencies at all. + +print(diff(skl5, ort5)) + +############################################## +# CastRegressor +# +++++++++++++ +# +# The previous example demonstrated the type difference for +# the predicted values explains the small differences between +# :epkg:`scikit-learn` and :epkg:`onnxruntime`. But it does not +# with the current ONNX. Another option is to cast the +# the predictions into floats in the :epkg:`scikit-learn` pipeline. + + +ctree = CastRegressor(DecisionTreeRegressor(max_depth=max_depth)) +ctree.fit(Xi_train, yi_train) + +onx6 = to_onnx(ctree, Xi_train[:1].astype(numpy.float32)) + +sess6 = InferenceSession(onx6.SerializeToString()) + +skl6 = ctree.predict(X32) +ort6 = sess6.run(None, {'X': X32})[0] + +print(diff(skl6, ort6)) + +############################## +# Success! diff --git a/docs/tutorial/plot_fbegin_investigate.py b/docs/tutorial/plot_fbegin_investigate.py new file mode 100644 index 000000000..ccc239df9 --- /dev/null +++ b/docs/tutorial/plot_fbegin_investigate.py @@ -0,0 +1,122 @@ +""" +Intermediate results and investigation +====================================== + +.. index:: investigate, intermediate results + +There are many reasons why a user wants more than using +the converted model into ONNX. Intermediate results may be +needed, the output of every node in the graph. The ONNX +may need to be altered to remove some nodes. +Transfer learning is usually removing the last layers of +a deep neural network. Another reaason is debugging. +It often happens that the runtime fails to compute the predictions +due to a shape mismatch. Then it is useful the get the shape +of every intermediate result. This example looks into two +ways of doing it. + +.. contents:: + :local: + +Look into pipeline steps +++++++++++++++++++++++++ + +The first way is a tricky one: it overloads +methods *transform*, *predict* and *predict_proba* +to keep a copy of inputs and outputs. It then goes +through every step of the pipeline. If the pipeline +has *n* steps, it converts the pipeline with step 1, +then the pipeline with steps 1, 2, then 1, 2, 3... +""" +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +from mlprodict.onnxrt import OnnxInference +import numpy +from onnxruntime import InferenceSession +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.cluster import KMeans +from sklearn.datasets import load_iris +from skl2onnx import to_onnx +from skl2onnx.helpers import collect_intermediate_steps +from skl2onnx.common.data_types import FloatTensorType + +########################### +# The pipeline. + +data = load_iris() +X = data.data + +pipe = Pipeline(steps=[ + ('std', StandardScaler()), + ('km', KMeans(3)) +]) +pipe.fit(X) + +################################# +# The function goes through every step, +# overloads the methods *transform* and +# returns an ONNX graph for every step. +steps = collect_intermediate_steps( + pipe, "pipeline", + [("X", FloatTensorType([None, X.shape[1]]))]) + +##################################### +# We call method transform to population the +# cache the overloaded methods *transform* keeps. +pipe.transform(X) + +####################################### +# We compute every step and compare +# ONNX and scikit-learn outputs. + +for step in steps: + print('----------------------------') + print(step['model']) + onnx_step = step['onnx_step'] + sess = InferenceSession(onnx_step.SerializeToString()) + onnx_outputs = sess.run(None, {'X': X.astype(numpy.float32)}) + onnx_output = onnx_outputs[-1] + skl_outputs = step['model']._debug.outputs['transform'] + + # comparison + diff = numpy.abs(skl_outputs.ravel() - onnx_output.ravel()).max() + print("difference", diff) + +# That was the first way: dynamically overwrite +# every method transform or predict in a scikit-learn +# pipeline to capture the input and output of every step, +# compare them to the output produced by truncated ONNX +# graphs built from the first one. +# +##################################### +# Python runtime to look into every node +# ++++++++++++++++++++++++++++++++++++++ +# +# The python runtime may be useful to easily look +# into every node of the ONNX graph. +# This option can be used to check when the computation +# fails due to nan values or a dimension mismatch. + + +onx = to_onnx(pipe, X[:1].astype(numpy.float32)) + +oinf = OnnxInference(onx) +oinf.run({'X': X[:2].astype(numpy.float32)}, + verbose=1, fLOG=print) + +################################### +# And to get a sense of the intermediate results. + +oinf.run({'X': X[:2].astype(numpy.float32)}, + verbose=3, fLOG=print) + +# This way is usually better if you need to investigate +# issues within the code of the runtime for an operator. +# +################################# +# Final graph +# +++++++++++ + +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) diff --git a/docs/tutorial/plot_gbegin_dataframe.py b/docs/tutorial/plot_gbegin_dataframe.py new file mode 100644 index 000000000..d77f49557 --- /dev/null +++ b/docs/tutorial/plot_gbegin_dataframe.py @@ -0,0 +1,183 @@ +""" +Dataframe as an input +===================== + +.. index:: dataframe + +A pipeline usually ingests data as a matrix. It may be converted in a matrix +if all the data share the same type. But data held in a dataframe +have usually multiple types, float, integer or string for categories. +ONNX also supports that case. + +.. contents:: + :local: + +A dataset with categories ++++++++++++++++++++++++++ + +""" +from mlinsights.plotting import pipeline2dot +import numpy +import pprint +from mlprodict.onnx_conv import guess_schema_from_data +from onnxruntime import InferenceSession +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +from mlprodict.onnxrt import OnnxInference +from mlprodict.onnx_conv import to_onnx as to_onnx_ext +from skl2onnx import to_onnx +from pandas import DataFrame +from sklearn.pipeline import Pipeline +from sklearn.compose import ColumnTransformer +from sklearn.preprocessing import OneHotEncoder +from sklearn.ensemble import RandomForestClassifier + + +data = DataFrame([ + dict(CAT1='a', CAT2='c', num1=0.5, num2=0.6, y=0), + dict(CAT1='b', CAT2='d', num1=0.4, num2=0.8, y=1), + dict(CAT1='a', CAT2='d', num1=0.5, num2=0.56, y=0), + dict(CAT1='a', CAT2='d', num1=0.55, num2=0.56, y=1), + dict(CAT1='a', CAT2='c', num1=0.35, num2=0.86, y=0), + dict(CAT1='a', CAT2='c', num1=0.5, num2=0.68, y=1), +]) + +cat_cols = ['CAT1', 'CAT2'] +train_data = data.drop('y', axis=1) + + +categorical_transformer = Pipeline([ + ('onehot', OneHotEncoder(sparse=False, handle_unknown='ignore'))]) +preprocessor = ColumnTransformer( + transformers=[ + ('cat', categorical_transformer, cat_cols)], + remainder='passthrough') +pipe = Pipeline([('preprocess', preprocessor), + ('rf', RandomForestClassifier())]) +pipe.fit(train_data, data['y']) + +##################################### +# Display. + +dot = pipeline2dot(pipe, train_data) +ax = plot_graphviz(dot) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) + +####################################### +# Conversion to ONNX +# ++++++++++++++++++ +# +# Function *to_onnx* does not handle dataframes. + + +try: + onx = to_onnx(pipe, train_data[:1]) +except NotImplementedError as e: + print(e) + +################################### +# But it possible to use an extended one. + + +onx = to_onnx_ext( + pipe, train_data[:1], + options={RandomForestClassifier: {'zipmap': False}}) + +####################################### +# Graph +# +++++ + + +oinf = OnnxInference(onx) +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) + + +################################# +# Prediction with ONNX +# ++++++++++++++++++++ +# +# *onnxruntime* does not support dataframes. + + +sess = InferenceSession(onx.SerializeToString()) +try: + sess.run(None, train_data) +except Exception as e: + print(e) + +########################### +# Let's use a shortcut + +oinf = OnnxInference(onx) +got = oinf.run(train_data) +print(pipe.predict(train_data)) +print(got['label']) + +################################# +# And probilities. + +print(pipe.predict_proba(train_data)) +print(got['probabilities']) + +###################################### +# It looks ok. Let's dig into the details to +# directly use *onnxruntime*. +# +# Unhide conversion logic with a dataframe +# ++++++++++++++++++++++++++++++++++++++++ +# +# A dataframe can be seen as a set of columns with +# different types. That's what ONNX should see: +# a list of inputs, the input name is the column name, +# the input type is the column type. + + +init = guess_schema_from_data(train_data) + +pprint.pprint(init) + +############################### +# Let's use float instead. + + +for c in train_data.columns: + if c not in cat_cols: + train_data[c] = train_data[c].astype(numpy.float32) + +init = guess_schema_from_data(train_data) +pprint.pprint(init) + +############################## +# Let's convert with *skl2onnx* only. + +onx2 = to_onnx( + pipe, initial_types=init, + options={RandomForestClassifier: {'zipmap': False}}) + +##################################### +# Let's run it with onnxruntime. +# We need to convert the dataframe into a dictionary +# where column names become keys, and column values become +# values. + +inputs = {c: train_data[c].values.reshape((-1, 1)) + for c in train_data.columns} +pprint.pprint(inputs) + +############################# +# Inference. + +sess2 = InferenceSession(onx2.SerializeToString()) + +got2 = sess2.run(None, inputs) + +print(pipe.predict(train_data)) +print(got2[0]) + +################################# +# And probilities. + +print(pipe.predict_proba(train_data)) +print(got2[1]) diff --git a/docs/tutorial/plot_gbegin_transfer_learning.py b/docs/tutorial/plot_gbegin_transfer_learning.py new file mode 100644 index 000000000..7607ea80a --- /dev/null +++ b/docs/tutorial/plot_gbegin_transfer_learning.py @@ -0,0 +1,248 @@ +""" +Transfer Learning with ONNX +=========================== + +.. index:: transfer learning, deep learning + +Transfer learning is common with deep learning. +A deep learning model is used as preprocessing before +the output is sent to a final classifier or regressor. +It is not quite easy in this case to mix framework, +:epkg:`scikit-learn` with :epkg:`pytorch` +(or :epkg:`skorch`), the Keras API for Tensorflow, +`tf.keras.wrappers.scikit_learn +`_. Every combination +requires work. ONNX reduces the number of platforms to +support. Once the model is converted into ONNX, +it can be inserted in any :epkg:`scikit-learn` pipeline. + +.. contents:: + :local: + +Retrieve and load a model ++++++++++++++++++++++++++ + +We download one model from the :epkg:`ONNX Zoo` but the model +could be trained and produced by another converter library. +""" +from io import BytesIO +import onnx +from mlprodict.sklapi import OnnxTransformer +from sklearn.decomposition import PCA +from sklearn.pipeline import Pipeline +from mlinsights.plotting.gallery import plot_gallery_images +import matplotlib.pyplot as plt +from skl2onnx.tutorial.imagenet_classes import class_names +import numpy +from PIL import Image +from onnxruntime import InferenceSession +import os +import urllib.request + + +def download_file(url, name, min_size): + if not os.path.exists(name): + print("download '%s'" % url) + with urllib.request.urlopen(url) as u: + content = u.read() + if len(content) < min_size: + raise RuntimeError( + "Unable to download '{}' due to\n{}".format( + url, content)) + print("downloaded %d bytes." % len(content)) + with open(name, "wb") as f: + f.write(content) + else: + print("'%s' already downloaded" % name) + + +model_name = "squeezenet1.1-7.onnx" +url_name = ("https://github.com/onnx/models/raw/master/vision/" + "classification/squeezenet/model") +url_name += "/" + model_name +download_file(url_name, model_name, 100000) + + +################################################ +# Loading the ONNX file and use it on one image. + +sess = InferenceSession(model_name) + +for inp in sess.get_inputs(): + print(inp) + +##################################### +# The model expects a series of images of size +# `[3, 224, 224]`. + +########################################## +# Classifying an image +# ++++++++++++++++++++ + +url = ("https://upload.wikimedia.org/wikipedia/commons/d/d2/" + "East_Coker_elm%2C_2.jpg") +img = "East_Coker_elm.jpg" +download_file(url, img, 100000) + +im0 = Image.open(img) +im = im0.resize((224, 224)) +# im.show() + +###################################### +# Image to numpy and predection. + + +def im2array(im): + X = numpy.asarray(im) + X = X.transpose(2, 0, 1) + X = X.reshape(1, 3, 224, 224) + return X + + +X = im2array(im) +out = sess.run(None, {'data': X.astype(numpy.float32)}) +out = out[0] + +print(out[0, :5]) + +##################################### +# Interpretation + + +res = list(sorted((r, class_names[i]) for i, r in enumerate(out[0]))) +print(res[-5:]) + +########################################## +# Classifying more images +# +++++++++++++++++++++++ +# +# The initial image is rotated, +# the answer is changing. + +angles = [a * 2. for a in range(-6, 6)] +imgs = [(angle, im0.rotate(angle).resize((224, 224))) + for angle in angles] + + +def classify(imgs): + labels = [] + for angle, img in imgs: + X = im2array(img) + probs = sess.run(None, {'data': X.astype(numpy.float32)})[0] + pl = list(sorted( + ((r, class_names[i]) for i, r in enumerate(probs[0])), + reverse=True)) + labels.append((angle, pl)) + return labels + + +climgs = classify(imgs) +for angle, res in climgs: + print("angle={} - {}".format(angle, res[:5])) + + +plot_gallery_images([img[1] for img in imgs], + [img[1][0][1][:15] for img in climgs]) + +######################################### +# Transfer learning in a pipeline +# +++++++++++++++++++++++++++++++ +# +# The proposed transfer learning consists +# using a PCA to projet the probabilities +# on a graph. + + +with open(model_name, 'rb') as f: + model_bytes = f.read() + +pipe = Pipeline(steps=[ + ('deep', OnnxTransformer( + model_bytes, runtime='onnxruntime1', change_batch_size=0)), + ('pca', PCA(2)) +]) + +X_train = numpy.vstack( + [im2array(img) for _, img in imgs]).astype(numpy.float32) +pipe.fit(X_train) + +proj = pipe.transform(X_train) +print(proj) + +########################################### +# Graph for the PCA +# ----------------- + +fig, ax = plt.subplots(1, 1, figsize=(5, 5)) +ax.plot(proj[:, 0], proj[:, 1], 'o') +ax.set_title("Projection of classification probabilities") +text = ["%1.0f-%s" % (el[0], el[1][0][1]) for el in climgs] +for label, x, y in zip(text, proj[:, 0], proj[:, 1]): + ax.annotate( + label, xy=(x, y), xytext=(-10, 10), fontsize=8, + textcoords='offset points', ha='right', va='bottom', + bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), + arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0')) + +########################################### +# Remove one layer at the end +# --------------------------- +# +# The last is often removed before the model is +# inserted in a pipeline. Let's see how to do that. +# First, we need the list of output for every node. + + +model_onnx = onnx.load(BytesIO(model_bytes)) +outputs = [] +for node in model_onnx.graph.node: + print(node.name, node.output) + outputs.extend(node.output) + +################################# +# We select one of the last one. + +selected = outputs[-3] +print("selected", selected) + +################################# +# And we tell *OnnxTransformer* to use that +# specific one and to flatten the output +# as the dimension is not a matrix. + + +pipe2 = Pipeline(steps=[ + ('deep', OnnxTransformer( + model_bytes, runtime='onnxruntime1', change_batch_size=0, + output_name=selected, reshape=True)), + ('pca', PCA(2)) +]) + +pipe2.fit(X_train) + +####################################### +# We check that it is different. +# The following values are the shape of the +# PCA components. The number of column is the number +# of dimensions of the outputs of the transfered +# neural network. + +print(pipe.steps[1][1].components_.shape, + pipe2.steps[1][1].components_.shape) + +####################################### +# Graph again. + +proj2 = pipe2.transform(X_train) + +fig, ax = plt.subplots(1, 1, figsize=(5, 5)) +ax.plot(proj2[:, 0], proj2[:, 1], 'o') +ax.set_title("Second projection of classification probabilities") +text = ["%1.0f-%s" % (el[0], el[1][0][1]) for el in climgs] +for label, x, y in zip(text, proj2[:, 0], proj2[:, 1]): + ax.annotate( + label, xy=(x, y), xytext=(-10, 10), fontsize=8, + textcoords='offset points', ha='right', va='bottom', + bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), + arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0')) diff --git a/docs/tutorial/plot_gexternal_lightgbm.py b/docs/tutorial/plot_gexternal_lightgbm.py new file mode 100644 index 000000000..6fc388df6 --- /dev/null +++ b/docs/tutorial/plot_gexternal_lightgbm.py @@ -0,0 +1,108 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +""" +.. _example-lightgbm: + +Convert a pipeline with a LightGBM model +======================================== + +.. index:: LightGBM + +:epkg:`sklearn-onnx` only converts :epkg:`scikit-learn` models into *ONNX* +but many libraries implement :epkg:`scikit-learn` API so that their models +can be included in a :epkg:`scikit-learn` pipeline. This example considers +a pipeline including a :epkg:`LightGBM` model. :epkg:`sklearn-onnx` can convert +the whole pipeline as long as it knows the converter associated to +a *LGBMClassifier*. Let's see how to do it. + +.. contents:: + :local: + +Train a LightGBM classifier ++++++++++++++++++++++++++++ +""" +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +from mlprodict.onnxrt import OnnxInference +import onnxruntime as rt +from skl2onnx import convert_sklearn, update_registered_converter +from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes # noqa +from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm # noqa +from skl2onnx.common.data_types import FloatTensorType +import numpy +from sklearn.datasets import load_iris +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler +from lightgbm import LGBMClassifier + +data = load_iris() +X = data.data[:, :2] +y = data.target + +ind = numpy.arange(X.shape[0]) +numpy.random.shuffle(ind) +X = X[ind, :].copy() +y = y[ind].copy() + +pipe = Pipeline([('scaler', StandardScaler()), + ('lgbm', LGBMClassifier(n_estimators=3))]) +pipe.fit(X, y) + +###################################### +# Register the converter for LGBMClassifier +# +++++++++++++++++++++++++++++++++++++++++ +# +# The converter is implemented in :epkg:`onnxmltools`: +# `onnxmltools...LightGbm.py +# `_. +# and the shape calculator: +# `onnxmltools...Classifier.py +# `_. + +update_registered_converter( + LGBMClassifier, 'LightGbmLGBMClassifier', + calculate_linear_classifier_output_shapes, convert_lightgbm, + options={'nocl': [True, False], 'zipmap': [True, False]}) + +################################## +# Convert again +# +++++++++++++ + +model_onnx = convert_sklearn( + pipe, 'pipeline_lightgbm', + [('input', FloatTensorType([None, 2]))], + target_opset=12) + +# And save. +with open("pipeline_lightgbm.onnx", "wb") as f: + f.write(model_onnx.SerializeToString()) + +########################### +# Compare the predictions +# +++++++++++++++++++++++ +# +# Predictions with LightGbm. + +print("predict", pipe.predict(X[:5])) +print("predict_proba", pipe.predict_proba(X[:1])) + +########################## +# Predictions with onnxruntime. + +sess = rt.InferenceSession("pipeline_lightgbm.onnx") + +pred_onx = sess.run(None, {"input": X[:5].astype(numpy.float32)}) +print("predict", pred_onx[0]) +print("predict_proba", pred_onx[1][:1]) + +############################# +# Final graph +# +++++++++++ + + +oinf = OnnxInference(model_onnx) +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) diff --git a/docs/tutorial/plot_gexternal_xgboost.py b/docs/tutorial/plot_gexternal_xgboost.py new file mode 100644 index 000000000..63e1a2c80 --- /dev/null +++ b/docs/tutorial/plot_gexternal_xgboost.py @@ -0,0 +1,194 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +""" +.. _example-xgboost: + +Convert a pipeline with a XGBoost model +======================================== + +.. index:: XGBoost + +:epkg:`sklearn-onnx` only converts :epkg:`scikit-learn` models +into :epkg:`ONNX` but many libraries implement :epkg:`scikit-learn` +API so that their models can be included in a :epkg:`scikit-learn` +pipeline. This example considers a pipeline including a :epkg:`XGBoost` +model. :epkg:`sklearn-onnx` can convert the whole pipeline as long as +it knows the converter associated to a *XGBClassifier*. Let's see +how to do it. + +.. contents:: + :local: + +Train a XGBoost classifier +++++++++++++++++++++++++++ +""" +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +from mlprodict.onnxrt import OnnxInference +import numpy +import onnxruntime as rt +from sklearn.datasets import load_iris, load_diabetes, make_classification +from sklearn.model_selection import train_test_split +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler +from xgboost import XGBClassifier, XGBRegressor, DMatrix, train as train_xgb +from skl2onnx.common.data_types import FloatTensorType +from skl2onnx import convert_sklearn, to_onnx, update_registered_converter +from skl2onnx.common.shape_calculator import ( + calculate_linear_classifier_output_shapes, + calculate_linear_regressor_output_shapes) +from onnxmltools.convert.xgboost.operator_converters.XGBoost import ( + convert_xgboost) +from onnxmltools.convert import convert_xgboost as convert_xgboost_booster + + +data = load_iris() +X = data.data[:, :2] +y = data.target + +ind = numpy.arange(X.shape[0]) +numpy.random.shuffle(ind) +X = X[ind, :].copy() +y = y[ind].copy() + +pipe = Pipeline([('scaler', StandardScaler()), + ('xgb', XGBClassifier(n_estimators=3))]) +pipe.fit(X, y) + +# The conversion fails but it is expected. + +try: + convert_sklearn(pipe, 'pipeline_xgboost', + [('input', FloatTensorType([None, 2]))], + target_opset=12) +except Exception as e: + print(e) + +# The error message tells no converter was found +# for :epkg:`XGBoost` models. By default, :epkg:`sklearn-onnx` +# only handles models from :epkg:`scikit-learn` but it can +# be extended to every model following :epkg:`scikit-learn` +# API as long as the module knows there exists a converter +# for every model used in a pipeline. That's why +# we need to register a converter. + +###################################### +# Register the converter for XGBClassifier +# ++++++++++++++++++++++++++++++++++++++++ +# +# The converter is implemented in :epkg:`onnxmltools`: +# `onnxmltools...XGBoost.py +# `_. +# and the shape calculator: +# `onnxmltools...Classifier.py +# `_. + +update_registered_converter( + XGBClassifier, 'XGBoostXGBClassifier', + calculate_linear_classifier_output_shapes, convert_xgboost, + options={'nocl': [True, False], 'zipmap': [True, False]}) + +################################## +# Convert again +# +++++++++++++ + +model_onnx = convert_sklearn( + pipe, 'pipeline_xgboost', + [('input', FloatTensorType([None, 2]))], + target_opset=12) + +# And save. +with open("pipeline_xgboost.onnx", "wb") as f: + f.write(model_onnx.SerializeToString()) + +########################### +# Compare the predictions +# +++++++++++++++++++++++ +# +# Predictions with XGBoost. + +print("predict", pipe.predict(X[:5])) +print("predict_proba", pipe.predict_proba(X[:1])) + +########################## +# Predictions with onnxruntime. + +sess = rt.InferenceSession("pipeline_xgboost.onnx") +pred_onx = sess.run(None, {"input": X[:5].astype(numpy.float32)}) +print("predict", pred_onx[0]) +print("predict_proba", pred_onx[1][:1]) + +############################# +# Final graph +# +++++++++++ + + +oinf = OnnxInference(model_onnx) +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) + + +####################################### +# Same example with XGBRegressor +# ++++++++++++++++++++++++++++++ + +update_registered_converter( + XGBRegressor, 'XGBoostXGBRegressor', + calculate_linear_regressor_output_shapes, convert_xgboost) + + +data = load_diabetes() +x = data.data +y = data.target +X_train, X_test, y_train, _ = train_test_split(x, y, test_size=0.5) + +pipe = Pipeline([('scaler', StandardScaler()), + ('xgb', XGBRegressor(n_estimators=3))]) +pipe.fit(X_train, y_train) + +print("predict", pipe.predict(X_test[:5])) + +############################# +# ONNX + +onx = to_onnx(pipe, X_train.astype(numpy.float32)) + +sess = rt.InferenceSession(onx.SerializeToString()) +pred_onx = sess.run(None, {"X": X_test[:5].astype(numpy.float32)}) +print("predict", pred_onx[0].ravel()) + +################################# +# Some discrepencies may appear. In that case, +# you should read :ref:`l-example-discrepencies-float-double`. + +################################################# +# Same with a Booster +# +++++++++++++++++++ +# +# A booster cannot be inserted in a pipeline. It requires +# a different conversion function because it does not +# follow :epkg:`scikit-learn` API. + +x, y = make_classification(n_classes=2, n_features=5, + n_samples=100, + random_state=42, n_informative=3) +X_train, X_test, y_train, _ = train_test_split(x, y, test_size=0.5, + random_state=42) + +dtrain = DMatrix(X_train, label=y_train) + +param = {'objective': 'multi:softmax', 'num_class': 3} +bst = train_xgb(param, dtrain, 10) + +initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))] +onx = convert_xgboost_booster(bst, "name", initial_types=initial_type) + +sess = rt.InferenceSession(onx.SerializeToString()) +input_name = sess.get_inputs()[0].name +label_name = sess.get_outputs()[0].name +pred_onx = sess.run( + [label_name], {input_name: X_test.astype(numpy.float32)})[0] +print(pred_onx) diff --git a/docs/tutorial/plot_icustom_converter.py b/docs/tutorial/plot_icustom_converter.py new file mode 100644 index 000000000..33d3f8163 --- /dev/null +++ b/docs/tutorial/plot_icustom_converter.py @@ -0,0 +1,240 @@ +""" +.. _l-plot-custom-converter: + +Implement a new converter +========================= + +.. index:: custom converter + +By default, :epkg:`sklearn-onnx` assumes that a classifier +has two outputs (label and probabilities), a regressor +has one output (prediction), a transform has one output +(the transformed data). This example assumes the model to +convert is one of them. In that case, a new converter requires +in fact two functions: + +* a shape calculator: it defines the output shape and type + based on the model and input type, +* a converter: it actually builds an ONNX graph equivalent + to the prediction function to be converted. + +This example implements both components for a new model. + +.. contents:: + :local: + +Custom model +++++++++++++ + +Let's implement a simple custom model using +:epkg:`scikit-learn` API. The model is preprocessing +which decorrelates correlated random variables. +If *X* is a matrix of features, :math:`V=\frac{1}{n}X'X` +is the covariance matrix. We compute :math:`X V^{1/2}`. +""" +from mlprodict.onnxrt import OnnxInference +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +import pickle +from io import BytesIO +import numpy +from numpy.testing import assert_almost_equal +from onnxruntime import InferenceSession +from sklearn.base import TransformerMixin, BaseEstimator +from sklearn.datasets import load_iris +from skl2onnx.common.data_types import guess_numpy_type +from skl2onnx import to_onnx +from skl2onnx import update_registered_converter +from skl2onnx.algebra.onnx_ops import OnnxMatMul, OnnxSub + + +class DecorrelateTransformer(TransformerMixin, BaseEstimator): + """ + Decorrelates correlated gaussian features. + + :param alpha: avoids non inversible matrices + by adding *alpha* identity matrix + + *Attributes* + + * `self.mean_`: average + * `self.coef_`: square root of the coveriance matrix + """ + + def __init__(self, alpha=0.): + BaseEstimator.__init__(self) + TransformerMixin.__init__(self) + self.alpha = alpha + + def fit(self, X, y=None, sample_weights=None): + if sample_weights is not None: + raise NotImplementedError( + "sample_weights != None is not implemented.") + self.mean_ = numpy.mean(X, axis=0, keepdims=True) + X = X - self.mean_ + V = X.T @ X / X.shape[0] + if self.alpha != 0: + V += numpy.identity(V.shape[0]) * self.alpha + L, P = numpy.linalg.eig(V) + Linv = L ** (-0.5) + diag = numpy.diag(Linv) + root = P @ diag @ P.transpose() + self.coef_ = root + return self + + def transform(self, X): + return (X - self.mean_) @ self.coef_ + + +def test_decorrelate_transformer(): + data = load_iris() + X = data.data + + dec = DecorrelateTransformer() + dec.fit(X) + pred = dec.transform(X) + cov = pred.T @ pred + cov /= cov[0, 0] + assert_almost_equal(numpy.identity(4), cov) + + dec = DecorrelateTransformer(alpha=1e-10) + dec.fit(X) + pred = dec.transform(X) + cov = pred.T @ pred + cov /= cov[0, 0] + assert_almost_equal(numpy.identity(4), cov) + + st = BytesIO() + pickle.dump(dec, st) + dec2 = pickle.load(BytesIO(st.getvalue())) + assert_almost_equal(dec.mean_, dec2.mean_) + assert_almost_equal(dec.coef_, dec2.coef_) + assert id(dec.mean_) != id(dec2.mean_) + assert id(dec.coef_) != id(dec2.coef_) + + +test_decorrelate_transformer() + +data = load_iris() +X = data.data + +dec = DecorrelateTransformer() +dec.fit(X) +pred = dec.transform(X[:5]) +print(pred) + +######################################## +# Trained coefficients. +print(dec.mean_) +print(dec.coef_) + + +############################################ +# Conversion into ONNX +# ++++++++++++++++++++ +# +# Let's try to convert it and see what happens. + + +try: + to_onnx(dec, X.astype(numpy.float32)) +except Exception as e: + print(e) + +############################ +# This error means there is no converter associated +# to *DecorrelateTransformer*. Let's implement it. +# It requires the two following +# functions, a shape calculator and a converter +# with the same signature as below. +# First the shape calculator. We retrieve the input type +# add tells the output type has the same type, +# the same number of rows and a specific number of columns. + + +def decorrelate_transformer_shape_calculator(operator): + op = operator.raw_operator + input_type = operator.inputs[0].type.__class__ + input_dim = operator.inputs[0].type.shape[0] + output_type = input_type([input_dim, op.coef_.shape[1]]) + operator.outputs[0].type = output_type + + +################################### +# The converter. One thing we need to pay attention to +# is the target opset. This information is important +# to make sure that every node is defined following the +# specifications of that opset. + + +def decorrelate_transformer_converter(scope, operator, container): + op = operator.raw_operator + opv = container.target_opset + out = operator.outputs + + # We retrieve the unique input. + X = operator.inputs[0] + + # In most case, computation happen in floats. + # But it might be with double. ONNX is very strict + # about types, every constant should have the same + # type as the input. + dtype = guess_numpy_type(X.type) + + # We tell in ONNX language how to compute the unique output. + # op_version=opv tells which opset is requested + Y = OnnxMatMul( + OnnxSub(X, op.mean_.astype(dtype), op_version=opv), + op.coef_.astype(dtype), + op_version=opv, output_names=out[:1]) + Y.add_to(scope, container) + + +########################################## +# We need to let *skl2onnx* know about the new converter. + + +update_registered_converter( + DecorrelateTransformer, "SklearnDecorrelateTransformer", + decorrelate_transformer_shape_calculator, + decorrelate_transformer_converter) + + +onx = to_onnx(dec, X.astype(numpy.float32)) + +sess = InferenceSession(onx.SerializeToString()) + +exp = dec.transform(X.astype(numpy.float32)) +got = sess.run(None, {'X': X.astype(numpy.float32)})[0] + + +def diff(p1, p2): + p1 = p1.ravel() + p2 = p2.ravel() + d = numpy.abs(p2 - p1) + return d.max(), (d / numpy.abs(p1)).max() + + +print(diff(exp, got)) + +##################################### +# Let's check it works as well with double. + +onx = to_onnx(dec, X.astype(numpy.float64)) + +sess = InferenceSession(onx.SerializeToString()) + +exp = dec.transform(X.astype(numpy.float64)) +got = sess.run(None, {'X': X.astype(numpy.float64)})[0] +print(diff(exp, got)) + +############################################# +# The differences are smaller with double as expected. + +############################# +# Final graph +# +++++++++++ + +oinf = OnnxInference(onx) +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) diff --git a/docs/tutorial/plot_jcustom_syntax.py b/docs/tutorial/plot_jcustom_syntax.py new file mode 100644 index 000000000..bcc10e89e --- /dev/null +++ b/docs/tutorial/plot_jcustom_syntax.py @@ -0,0 +1,178 @@ +""" +Two ways to implement a converter +================================= + +.. index:: syntax + +There are two ways to write a converter. The first one +is very verbose (see `ada_boost.py `_ +for an example). The other is less verbose and easier to understand +(see `k_means.py `_). + +The first way is used in :ref:`l-plot-custom-converter`. +This one demonstrates the second way which is usually the one +used in other converter library. It is more verbose. + +.. contents:: + :local: + + +Custom model +++++++++++++ + +It basically copies what is in example +`:ref:`l-plot-custom-converter`. +""" +from skl2onnx.common.data_types import guess_proto_type +from onnxconverter_common.onnx_ops import apply_sub +from onnxruntime import InferenceSession +from skl2onnx import update_registered_converter +from skl2onnx import to_onnx +import numpy +from sklearn.base import TransformerMixin, BaseEstimator +from sklearn.datasets import load_iris + + +class DecorrelateTransformer(TransformerMixin, BaseEstimator): + """ + Decorrelates correlated gaussian features. + + :param alpha: avoids non inversible matrices + by adding *alpha* identity matrix + + *Attributes* + + * `self.mean_`: average + * `self.coef_`: square root of the coveriance matrix + """ + + def __init__(self, alpha=0.): + BaseEstimator.__init__(self) + TransformerMixin.__init__(self) + self.alpha = alpha + + def fit(self, X, y=None, sample_weights=None): + if sample_weights is not None: + raise NotImplementedError( + "sample_weights != None is not implemented.") + self.mean_ = numpy.mean(X, axis=0, keepdims=True) + X = X - self.mean_ + V = X.T @ X / X.shape[0] + if self.alpha != 0: + V += numpy.identity(V.shape[0]) * self.alpha + L, P = numpy.linalg.eig(V) + Linv = L ** (-0.5) + diag = numpy.diag(Linv) + root = P @ diag @ P.transpose() + self.coef_ = root + return self + + def transform(self, X): + return (X - self.mean_) @ self.coef_ + + +data = load_iris() +X = data.data + +dec = DecorrelateTransformer() +dec.fit(X) +pred = dec.transform(X[:5]) +print(pred) + + +############################################ +# Conversion into ONNX +# ++++++++++++++++++++ +# +# The shape calculator does not change. + +def decorrelate_transformer_shape_calculator(operator): + op = operator.raw_operator + input_type = operator.inputs[0].type.__class__ + input_dim = operator.inputs[0].type.shape[0] + output_type = input_type([input_dim, op.coef_.shape[1]]) + operator.outputs[0].type = output_type + + +################################### +# The converter is different. + + +def decorrelate_transformer_converter(scope, operator, container): + op = operator.raw_operator + out = operator.outputs + + # We retrieve the unique input. + X = operator.inputs[0] + + # In most case, computation happen in floats. + # But it might be with double. ONNX is very strict + # about types, every constant should have the same + # type as the input. + proto_dtype = guess_proto_type(X.type) + + mean_name = scope.get_unique_variable_name('mean') + container.add_initializer(mean_name, proto_dtype, + op.mean_.shape, list(op.mean_.ravel())) + + coef_name = scope.get_unique_variable_name('coef') + container.add_initializer(coef_name, proto_dtype, + op.coef_.shape, list(op.coef_.ravel())) + + op_name = scope.get_unique_operator_name('sub') + sub_name = scope.get_unique_variable_name('sub') + # This function is defined in package onnxconverter_common. + # Most common operators can be added to the graph with + # these functions. It handles the case when specifications + # changed accross opsets (a parameter becomes an input + # for example). + apply_sub(scope, [X.full_name, mean_name], sub_name, container, + operator_name=op_name) + + op_name = scope.get_unique_operator_name('matmul') + container.add_node( + 'MatMul', [sub_name, coef_name], + out[0].full_name, name=op_name) + + +########################################## +# We need to let *skl2onnx* know about the new converter. + +update_registered_converter( + DecorrelateTransformer, "SklearnDecorrelateTransformer", + decorrelate_transformer_shape_calculator, + decorrelate_transformer_converter) + + +onx = to_onnx(dec, X.astype(numpy.float32)) + +sess = InferenceSession(onx.SerializeToString()) + +exp = dec.transform(X.astype(numpy.float32)) +got = sess.run(None, {'X': X.astype(numpy.float32)})[0] + + +def diff(p1, p2): + p1 = p1.ravel() + p2 = p2.ravel() + d = numpy.abs(p2 - p1) + return d.max(), (d / numpy.abs(p1)).max() + + +print(diff(exp, got)) + +##################################### +# Let's check it works as well with double. + +onx = to_onnx(dec, X.astype(numpy.float64)) + +sess = InferenceSession(onx.SerializeToString()) + +exp = dec.transform(X.astype(numpy.float64)) +got = sess.run(None, {'X': X.astype(numpy.float64)})[0] +print(diff(exp, got)) + +############################################# +# The differences are smaller with double as expected. diff --git a/docs/tutorial/plot_kcustom_converter_wrapper.py b/docs/tutorial/plot_kcustom_converter_wrapper.py new file mode 100644 index 000000000..cbab35775 --- /dev/null +++ b/docs/tutorial/plot_kcustom_converter_wrapper.py @@ -0,0 +1,203 @@ +""" +.. _l-plot-custom-converter-wrapper: + +Implement a new converter using other converters +================================================ + +.. index:: custom converter + +In many cases, a custom models leverages existing models +which already have an associated converter. To convert this +patchwork, existing converters must be called. This example +shows how to do that. Example :ref:`l-plot-custom-converter` +can be rewritten by using a `PCA `_. +We could then reuse the converter associated to this model. + +.. contents:: + :local: + +Custom model +++++++++++++ + +Let's implement a simple custom model using +:epkg:`scikit-learn` API. The model is preprocessing +which decorrelates correlated random variables. +If *X* is a matrix of features, :math:`V=\frac{1}{n}X'X` +is the covariance matrix. We compute :math:`X V^{1/2}`. +""" +from mlprodict.onnxrt import OnnxInference +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +import pickle +from io import BytesIO +import numpy +from numpy.testing import assert_almost_equal +from onnxruntime import InferenceSession +from sklearn.base import TransformerMixin, BaseEstimator +from sklearn.datasets import load_iris +from sklearn.decomposition import PCA +from skl2onnx import update_registered_converter +from skl2onnx.algebra.onnx_ops import OnnxIdentity +from skl2onnx.algebra.onnx_operator import OnnxSubOperator +from skl2onnx import to_onnx + + +class DecorrelateTransformer(TransformerMixin, BaseEstimator): + """ + Decorrelates correlated gaussian features. + + :param alpha: avoids non inversible matrices + by adding *alpha* identity matrix + + *Attributes* + + * `self.mean_`: average + * `self.coef_`: square root of the coveriance matrix + """ + + def __init__(self, alpha=0.): + BaseEstimator.__init__(self) + TransformerMixin.__init__(self) + self.alpha = alpha + + def fit(self, X, y=None, sample_weights=None): + self.pca_ = PCA(X.shape[1]) + self.pca_.fit(X) + return self + + def transform(self, X): + return self.pca_.transform(X) + + +def test_decorrelate_transformer(): + data = load_iris() + X = data.data + + dec = DecorrelateTransformer() + dec.fit(X) + pred = dec.transform(X) + cov = pred.T @ pred + for i in range(cov.shape[0]): + cov[i, i] = 1. + assert_almost_equal(numpy.identity(4), cov) + + st = BytesIO() + pickle.dump(dec, st) + dec2 = pickle.load(BytesIO(st.getvalue())) + assert_almost_equal(dec.transform(X), dec2.transform(X)) + + +test_decorrelate_transformer() + +data = load_iris() +X = data.data + +dec = DecorrelateTransformer() +dec.fit(X) +pred = dec.transform(X[:5]) +print(pred) + + +############################################ +# Conversion into ONNX +# ++++++++++++++++++++ +# +# Let's try to convert it and see what happens. + + +try: + to_onnx(dec, X.astype(numpy.float32)) +except Exception as e: + print(e) + +############################ +# This error means there is no converter associated +# to *DecorrelateTransformer*. Let's do it. +# It requires to implement the two following +# functions, a shape calculator and a converter +# with the same signature as below. +# First the shape calculator. We retrieve the input type +# add tells the output type has the same type, +# the same number of rows and a specific number of columns. + + +def decorrelate_transformer_shape_calculator(operator): + op = operator.raw_operator + input_type = operator.inputs[0].type.__class__ + input_dim = operator.inputs[0].type.shape[0] + output_type = input_type([input_dim, op.pca_.components_.shape[1]]) + operator.outputs[0].type = output_type + + +################################### +# The converter. One thing we need to pay attention to +# is the target opset. This information is important +# to make sure that every node is defined following the +# specifications of that opset. + + +def decorrelate_transformer_converter(scope, operator, container): + op = operator.raw_operator + opv = container.target_opset + out = operator.outputs + + # We retrieve the unique input. + X = operator.inputs[0] + + # We tell in ONNX language how to compute the unique output. + # op_version=opv tells which opset is requested + subop = OnnxSubOperator(op.pca_, X, op_version=opv) + Y = OnnxIdentity(subop, op_version=opv, output_names=out[:1]) + Y.add_to(scope, container) + + +########################################## +# We need to let *skl2onnx* know about the new converter. + + +update_registered_converter( + DecorrelateTransformer, "SklearnDecorrelateTransformer", + decorrelate_transformer_shape_calculator, + decorrelate_transformer_converter) + + +onx = to_onnx(dec, X.astype(numpy.float32)) + +sess = InferenceSession(onx.SerializeToString()) + +exp = dec.transform(X.astype(numpy.float32)) +got = sess.run(None, {'X': X.astype(numpy.float32)})[0] + + +def diff(p1, p2): + p1 = p1.ravel() + p2 = p2.ravel() + d = numpy.abs(p2 - p1) + return d.max(), (d / numpy.abs(p1)).max() + + +print(diff(exp, got)) + +##################################### +# Let's check it works as well with double. + +onx = to_onnx(dec, X.astype(numpy.float64)) + +sess = InferenceSession(onx.SerializeToString()) + +exp = dec.transform(X.astype(numpy.float64)) +got = sess.run(None, {'X': X.astype(numpy.float64)})[0] +print(diff(exp, got)) + +############################################# +# The differences are smaller with double as expected. + + +############################# +# Final graph +# +++++++++++ + +oinf = OnnxInference(onx) +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) diff --git a/docs/tutorial/plot_lcustom_options.py b/docs/tutorial/plot_lcustom_options.py new file mode 100644 index 000000000..60e95dd00 --- /dev/null +++ b/docs/tutorial/plot_lcustom_options.py @@ -0,0 +1,204 @@ +""" +.. _l-plot-custom-options: + +A new converter with options +============================ + +.. index:: options + +Options are used to implement different conversion +for a same model. The options can be used to replace +an operator *MatMul* by the *Gemm* operator and compare the +processing time for both graph. Let's see how to retrieve +the options within a converter. + +Example :ref:`l-plot-custom-converter` implements a converter +which uses operator *MatMul*. Option *use_gemm* is used to +replace *MatMul* by *Gemm*. + +.. contents:: + :local: + +Custom model +++++++++++++ + +""" +from mlprodict.onnxrt import OnnxInference +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +from pandas import DataFrame +from skl2onnx.tutorial import measure_time +import numpy +from onnxruntime import InferenceSession +from sklearn.base import TransformerMixin, BaseEstimator +from sklearn.datasets import load_iris +from skl2onnx import update_registered_converter +from skl2onnx.common.data_types import guess_numpy_type +from skl2onnx.algebra.onnx_ops import ( + OnnxSub, OnnxMatMul, OnnxGemm) +from skl2onnx import to_onnx + + +class DecorrelateTransformer(TransformerMixin, BaseEstimator): + """ + Decorrelates correlated gaussian features. + + :param alpha: avoids non inversible matrices + by adding *alpha* identity matrix + + *Attributes* + + * `self.mean_`: average + * `self.coef_`: square root of the coveriance matrix + """ + + def __init__(self, alpha=0.): + BaseEstimator.__init__(self) + TransformerMixin.__init__(self) + self.alpha = alpha + + def fit(self, X, y=None, sample_weights=None): + if sample_weights is not None: + raise NotImplementedError( + "sample_weights != None is not implemented.") + self.mean_ = numpy.mean(X, axis=0, keepdims=True) + X = X - self.mean_ + V = X.T @ X / X.shape[0] + if self.alpha != 0: + V += numpy.identity(V.shape[0]) * self.alpha + L, P = numpy.linalg.eig(V) + Linv = L ** (-0.5) + diag = numpy.diag(Linv) + root = P @ diag @ P.transpose() + self.coef_ = root + return self + + def transform(self, X): + return (X - self.mean_) @ self.coef_ + + +data = load_iris() +X = data.data + +dec = DecorrelateTransformer() +dec.fit(X) +pred = dec.transform(X[:5]) +print(pred) + + +############################################ +# Conversion into ONNX +# ++++++++++++++++++++ +# +# Let's try to convert it and see what happens. + + +def decorrelate_transformer_shape_calculator(operator): + op = operator.raw_operator + input_type = operator.inputs[0].type.__class__ + input_dim = operator.inputs[0].type.shape[0] + output_type = input_type([input_dim, op.coef_.shape[1]]) + operator.outputs[0].type = output_type + + +def decorrelate_transformer_converter(scope, operator, container): + op = operator.raw_operator + opv = container.target_opset + out = operator.outputs + + X = operator.inputs[0] + + dtype = guess_numpy_type(X.type) + options = container.get_options(op, dict(use_gemm=False)) + use_gemm = options['use_gemm'] + print('conversion: use_gemm=', use_gemm) + + if use_gemm: + Y = OnnxGemm(X, op.coef_.astype(dtype), + (- op.mean_ @ op.coef_).astype(dtype), + op_version=opv, alpha=1., beta=1., + output_names=out[:1]) + else: + Y = OnnxMatMul( + OnnxSub(X, op.mean_.astype(dtype), op_version=opv), + op.coef_.astype(dtype), + op_version=opv, output_names=out[:1]) + Y.add_to(scope, container) + + +################################### +# The registration needs to declare the options +# supported by the converted. + + +update_registered_converter( + DecorrelateTransformer, "SklearnDecorrelateTransformer", + decorrelate_transformer_shape_calculator, + decorrelate_transformer_converter, + options={'use_gemm': [True, False]}) + + +onx = to_onnx(dec, X.astype(numpy.float32)) + +sess = InferenceSession(onx.SerializeToString()) + +exp = dec.transform(X.astype(numpy.float32)) +got = sess.run(None, {'X': X.astype(numpy.float32)})[0] + + +def diff(p1, p2): + p1 = p1.ravel() + p2 = p2.ravel() + d = numpy.abs(p2 - p1) + return d.max(), (d / numpy.abs(p1)).max() + + +print(diff(exp, got)) + +############################################ +# We try the non default option, `use_gemm: True`. + +onx2 = to_onnx(dec, X.astype(numpy.float32), + options={'use_gemm': True}) + +sess2 = InferenceSession(onx2.SerializeToString()) + +exp = dec.transform(X.astype(numpy.float32)) +got2 = sess2.run(None, {'X': X.astype(numpy.float32)})[0] + +print(diff(exp, got2)) + +############################## +# Visually. + + +oinf = OnnxInference(onx2) +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) + + +######################################### +# Time comparison +# +++++++++++++++ +# +# Let's compare the two computation. + + +X32 = X.astype(numpy.float32) +obs = [] + +context = {'sess': sess, 'X32': X32} +mt = measure_time( + "sess.run(None, {'X': X32})", context, div_by_number=True, + number=100, repeat=1000) +mt['use_gemm'] = False +obs.append(mt) + +context = {'sess2': sess2, 'X32': X32} +mt2 = measure_time( + "sess2.run(None, {'X': X32})", context, div_by_number=True, + number=10, repeat=100) +mt2['use_gemm'] = True +obs.append(mt2) + +DataFrame(obs).T diff --git a/docs/tutorial/plot_mcustom_parser.py b/docs/tutorial/plot_mcustom_parser.py new file mode 100644 index 000000000..c6668c08e --- /dev/null +++ b/docs/tutorial/plot_mcustom_parser.py @@ -0,0 +1,191 @@ +""" +Change the number of outputs by adding a parser +=============================================== + +.. index:: parser + +By default, :epkg:`sklearn-onnx` assumes that a classifier +has two outputs (label and probabilities), a regressor +has one output (prediction), a transform has one output +(the transformed data). What if it is not the case? +The following example creates a custom converter +and a custom parser which defines the number of outputs +expected by the converted model. + +Example :ref:`l-plot-custom-options` shows a converter +which selects two ways to compute the same outputs. +In this one, the converter produces both. That would not +be a very efficient converter but that's just for the sake +of using a parser. By default, a transformer only returns +one output but both are needed. + +.. contents:: + :local: + +A new transformer ++++++++++++++++++ +""" +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +from mlprodict.onnxrt import OnnxInference +import numpy +from onnxruntime import InferenceSession +from sklearn.base import TransformerMixin, BaseEstimator +from sklearn.datasets import load_iris +from skl2onnx import update_registered_converter +from skl2onnx.common.data_types import guess_numpy_type +from skl2onnx.algebra.onnx_ops import ( + OnnxSub, OnnxMatMul, OnnxGemm) +from skl2onnx import to_onnx, get_model_alias + + +class DecorrelateTransformer(TransformerMixin, BaseEstimator): + """ + Decorrelates correlated gaussian features. + + :param alpha: avoids non inversible matrices + by adding *alpha* identity matrix + + *Attributes* + + * `self.mean_`: average + * `self.coef_`: square root of the coveriance matrix + """ + + def __init__(self, alpha=0.): + BaseEstimator.__init__(self) + TransformerMixin.__init__(self) + self.alpha = alpha + + def fit(self, X, y=None, sample_weights=None): + if sample_weights is not None: + raise NotImplementedError( + "sample_weights != None is not implemented.") + self.mean_ = numpy.mean(X, axis=0, keepdims=True) + X = X - self.mean_ + V = X.T @ X / X.shape[0] + if self.alpha != 0: + V += numpy.identity(V.shape[0]) * self.alpha + L, P = numpy.linalg.eig(V) + Linv = L ** (-0.5) + diag = numpy.diag(Linv) + root = P @ diag @ P.transpose() + self.coef_ = root + return self + + def transform(self, X): + return (X - self.mean_) @ self.coef_ + + +data = load_iris() +X = data.data + +dec = DecorrelateTransformer() +dec.fit(X) +pred = dec.transform(X[:5]) +print(pred) + + +############################################ +# Conversion into ONNX with two outputs +# +++++++++++++++++++++++++++++++++++++ +# +# Let's try to convert it and see what happens. + + +def decorrelate_transformer_shape_calculator(operator): + op = operator.raw_operator + input_type = operator.inputs[0].type.__class__ + input_dim = operator.inputs[0].type.shape[0] + output_type = input_type([input_dim, op.coef_.shape[1]]) + operator.outputs[0].type = output_type + + +def decorrelate_transformer_converter(scope, operator, container): + op = operator.raw_operator + opv = container.target_opset + out = operator.outputs + + X = operator.inputs[0] + + dtype = guess_numpy_type(X.type) + + Y1 = OnnxMatMul( + OnnxSub(X, op.mean_.astype(dtype), op_version=opv), + op.coef_.astype(dtype), + op_version=opv, output_names=out[:1]) + + Y2 = OnnxGemm(X, op.coef_.astype(dtype), + (- op.mean_ @ op.coef_).astype(dtype), + op_version=opv, alpha=1., beta=1., + output_names=out[1:2]) + + Y1.add_to(scope, container) + Y2.add_to(scope, container) + + +def decorrelate_transformer_parser( + scope, model, inputs, custom_parsers=None): + alias = get_model_alias(type(model)) + this_operator = scope.declare_local_operator(alias, model) + + # inputs + this_operator.inputs.append(inputs[0]) + + # outputs + cls_type = inputs[0].type.__class__ + val_y1 = scope.declare_local_variable('nogemm', cls_type()) + val_y2 = scope.declare_local_variable('gemm', cls_type()) + this_operator.outputs.append(val_y1) + this_operator.outputs.append(val_y2) + + # ends + return this_operator.outputs + +################################### +# The registration needs to declare the parser as well. + + +update_registered_converter( + DecorrelateTransformer, "SklearnDecorrelateTransformer", + decorrelate_transformer_shape_calculator, + decorrelate_transformer_converter, + parser=decorrelate_transformer_parser) + + +############################################# +# And conversion. + +onx = to_onnx(dec, X.astype(numpy.float32)) + +sess = InferenceSession(onx.SerializeToString()) + +exp = dec.transform(X.astype(numpy.float32)) +results = sess.run(None, {'X': X.astype(numpy.float32)}) +y1 = results[0] +y2 = results[1] + + +def diff(p1, p2): + p1 = p1.ravel() + p2 = p2.ravel() + d = numpy.abs(p2 - p1) + return d.max(), (d / numpy.abs(p1)).max() + + +print(diff(exp, y1)) +print(diff(exp, y2)) + + +################################ +# It works. The final looks like the following. + +oinf = OnnxInference(onx, runtime="python_compiled") +print(oinf) + +############################# +# Final graph +# +++++++++++ + +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) diff --git a/docs/tutorial/plot_pextend_python_runtime.py b/docs/tutorial/plot_pextend_python_runtime.py new file mode 100644 index 000000000..7bfd05270 --- /dev/null +++ b/docs/tutorial/plot_pextend_python_runtime.py @@ -0,0 +1,401 @@ +""" + +.. _l-extend-python-runtime: + +Fast design with a python runtime +================================= + +.. index:: custom python runtime + +:epkg:`ONNX operators` do not contain all operators +from :epkg:`numpy`. There is no operator for +`solve `_ but this one +is needed to implement the prediction function +of model :epkg:`NMF`. The converter can be written +including a new ONNX operator but then it requires a +runtime for it to be tested. This example shows how +to do that with the python runtime implemented in +:epkg:`mlprodict`. It may not be :epkg:`onnxruntime` +but that speeds up the implementation of the converter. + +The example changes the transformer from +:ref:`l-plot-custom-converter`, the method *predict* +decorrelates the variables by computing the eigen +values. Method *fit* does not do anything anymore. + +.. contents:: + :local: + +A transformer which decorrelates variables +++++++++++++++++++++++++++++++++++++++++++ + +This time, the eigen values are not estimated at +training time but at prediction time. +""" +from mlprodict.onnxrt.shape_object import ShapeObject +from mlprodict.onnxrt.ops_cpu import OpRunCustom, register_operator +from skl2onnx.algebra.onnx_ops import ( + OnnxAdd, + OnnxCast, + OnnxDiv, + OnnxGatherElements, + OnnxEyeLike, + OnnxMatMul, + OnnxMul, + OnnxPow, + OnnxReduceMean, + OnnxShape, + OnnxSub, + OnnxTranspose, +) +from skl2onnx.algebra import OnnxOperator +from mlprodict.onnxrt import OnnxInference +from pyquickhelper.helpgen.graphviz_helper import plot_graphviz +import pickle +from io import BytesIO +import numpy +from numpy.testing import assert_almost_equal +from sklearn.base import TransformerMixin, BaseEstimator +from sklearn.datasets import load_iris +from skl2onnx.common.data_types import guess_numpy_type, guess_proto_type +from skl2onnx import to_onnx +from skl2onnx import update_registered_converter + + +class LiveDecorrelateTransformer(TransformerMixin, BaseEstimator): + """ + Decorrelates correlated gaussian features. + + :param alpha: avoids non inversible matrices + by adding *alpha* identity matrix + + *Attributes* + + * `self.nf_`: number of expected features + """ + + def __init__(self, alpha=0.): + BaseEstimator.__init__(self) + TransformerMixin.__init__(self) + self.alpha = alpha + + def fit(self, X, y=None, sample_weights=None): + if sample_weights is not None: + raise NotImplementedError( + "sample_weights != None is not implemented.") + self.nf_ = X.shape[1] + return self + + def transform(self, X): + mean_ = numpy.mean(X, axis=0, keepdims=True) + X2 = X - mean_ + V = X2.T @ X2 / X2.shape[0] + if self.alpha != 0: + V += numpy.identity(V.shape[0]) * self.alpha + L, P = numpy.linalg.eig(V) + Linv = L ** (-0.5) + diag = numpy.diag(Linv) + root = P @ diag @ P.transpose() + coef_ = root + return (X - mean_) @ coef_ + + +def test_live_decorrelate_transformer(): + data = load_iris() + X = data.data + + dec = LiveDecorrelateTransformer() + dec.fit(X) + pred = dec.transform(X) + cov = pred.T @ pred + cov /= cov[0, 0] + assert_almost_equal(numpy.identity(4), cov) + + dec = LiveDecorrelateTransformer(alpha=1e-10) + dec.fit(X) + pred = dec.transform(X) + cov = pred.T @ pred + cov /= cov[0, 0] + assert_almost_equal(numpy.identity(4), cov) + + st = BytesIO() + pickle.dump(dec, st) + dec2 = pickle.load(BytesIO(st.getvalue())) + assert_almost_equal(dec.transform(X), dec2.transform(X)) + + +test_live_decorrelate_transformer() + +########################################### +# Everything works as expected. +# +# Extend ONNX +# +++++++++++ +# +# The conversion requires one operator to compute +# the eigen values and vectors. The list of +# :epkg:`ONNX operators` does not contain anything +# which produces eigen values. It does not seem +# efficient to implement an algorithm with existing +# ONNX operators to find eigen values. +# A new operator must be +# added, we give it the same name *Eig* as in :epkg:`numpy`. +# It would take a matrix and would produce one or two outputs, +# the eigen values and the eigen vectors. +# Just for the exercise, a parameter specifies +# to output the eigen vectors as a second output. +# +# New ONNX operator +# ^^^^^^^^^^^^^^^^^ +# +# Any unknown operator can be +# added to an ONNX graph. Operators are grouped by domain, +# `''` or `ai.onnx` refers to matrix computation. +# `ai.onnx.ml` refers to usual machine learning models. +# New domains are officially supported by :epkg:`onnx` package. +# We want to create a new operator `Eig` of domain `onnxcustom`. +# It must be declared in a class, then a converter can use it. + + +class OnnxEig(OnnxOperator): + """ + Defines a custom operator not defined by ONNX + specifications but in onnxruntime. + """ + + since_version = 1 # last changed in this version + expected_inputs = ['X'] # imput names + expected_outputs = ['EigenValues', 'EigenVectors'] # output names + input_range = [1, 1] # only one input is allowed + output_range = [1, 2] # 1 or 2 outputs are produced + is_deprecated = False # obviously not deprecated + domain = 'onnxcustom' # domain, anything is ok + operator_name = 'Eig' # operator name + past_version = {} # empty as it is the first version + + def __init__(self, X, eigv=False, op_version=None, **kwargs): + """ + :param X: array or OnnxOperatorMixin + :param eigv: also produces the eigen vectors + :param op_version: opset version + :param kwargs: additional parameters + """ + OnnxOperator.__init__( + self, X, eigv=eigv, op_version=op_version, **kwargs) + + +print(OnnxEig('X', eigv=True)) + +################################## +# Now we can write the converter and +# the shape calculator. +# +# shape calculator +# ^^^^^^^^^^^^^^^^ +# +# Nothing new here. + + +def live_decorrelate_transformer_shape_calculator(operator): + op = operator.raw_operator + input_type = operator.inputs[0].type.__class__ + input_dim = operator.inputs[0].type.shape[0] + output_type = input_type([input_dim, op.nf_]) + operator.outputs[0].type = output_type + + +################################## +# converter +# ^^^^^^^^^ +# +# The converter is using the class `OnnxEig`. The code +# is longer than previous converters as the computation is +# more complex too. + + +def live_decorrelate_transformer_converter(scope, operator, container): + # shortcuts + op = operator.raw_operator + opv = container.target_opset + out = operator.outputs + + # We retrieve the unique input. + X = operator.inputs[0] + + # We guess its type. If the operator ingests float (or double), + # it outputs float (or double). + proto_dtype = guess_proto_type(X.type) + dtype = guess_numpy_type(X.type) + + # Lines in comment specify the numpy computation + # the ONNX code implements. + # mean_ = numpy.mean(X, axis=0, keepdims=True) + mean = OnnxReduceMean(X, axes=[0], keepdims=1, op_version=opv) + + # This is trick I often use. The converter automatically + # chooses a name for every output. In big graph, + # it is difficult to know which operator is producing which output. + # This line just tells every node must prefix its ouputs with this string. + # It also applies to all inputs nodes unless this method + # was called for one of these nodes. + mean.set_onnx_name_prefix('mean') + + # X2 = X - mean_ + X2 = OnnxSub(X, mean, op_version=opv) + + # V = X2.T @ X2 / X2.shape[0] + N = OnnxGatherElements( + OnnxShape(X, op_version=opv), + numpy.array([0], dtype=numpy.int64), + op_version=opv) + Nf = OnnxCast(N, to=proto_dtype, op_version=opv) + + # Every output involved in N and Nf is prefixed by 'N'. + Nf.set_onnx_name_prefix('N') + + V = OnnxDiv( + OnnxMatMul(OnnxTranspose(X2, op_version=opv), + X2, op_version=opv), + Nf, op_version=opv) + V.set_onnx_name_prefix('V1') + + # V += numpy.identity(V.shape[0]) * self.alpha + V = OnnxAdd(V, + op.alpha * numpy.identity(op.nf_, dtype=dtype), + op_version=opv) + V.set_onnx_name_prefix('V2') + + # L, P = numpy.linalg.eig(V) + LP = OnnxEig(V, eigv=True, op_version=opv) + LP.set_onnx_name_prefix('LP') + + # Linv = L ** (-0.5) + # Notation LP[0] means OnnxPow is taking the first output + # of operator OnnxEig, LP[1] would mean the second one + # LP is not allowed as it is ambiguous + Linv = OnnxPow(LP[0], numpy.array([-0.5], dtype=dtype), + op_version=opv) + Linv.set_onnx_name_prefix('Linv') + + # diag = numpy.diag(Linv) + diag = OnnxMul( + OnnxEyeLike( + numpy.array([op.nf_, op.nf_], dtype=numpy.int64), + k=0, op_version=opv), + Linv, op_version=opv) + diag.set_onnx_name_prefix('diag') + + # root = P @ diag @ P.transpose() + trv = OnnxTranspose(LP[1], op_version=opv) + coef_left = OnnxMatMul(LP[1], diag, op_version=opv) + coef_left.set_onnx_name_prefix('coef_left') + coef = OnnxMatMul(coef_left, trv, op_version=opv) + coef.set_onnx_name_prefix('coef') + + # Same part as before. + Y = OnnxMatMul(X2, coef, op_version=opv, output_names=out[:1]) + Y.set_onnx_name_prefix('Y') + + # The last line specifies the final output. + # Every node involved in the computation is added to the ONNX + # graph at this stage. + Y.add_to(scope, container) + + +################################### +# Runtime for Eig +# ^^^^^^^^^^^^^^^ +# +# Here comes the new part. The python runtime does not +# implement any runtime for *Eig*. We need to tell the runtime +# to compute eigen values and vectors every time operator *Eig* +# is called. That means implementing two methods, +# one to compute, one to infer the shape of the results. +# The first one is mandatory, the second one can return an +# empty shape if it depends on the inputs. If it is known, +# the runtime may be able to optimize the computation, +# by reducing allocation for example. + +class OpEig(OpRunCustom): + + op_name = 'Eig' # operator name + atts = {'eigv': True} # operator parameters + + def __init__(self, onnx_node, desc=None, **options): + # constructor, every parameter is added a member + OpRunCustom.__init__(self, onnx_node, desc=desc, + expected_attributes=OpEig.atts, + **options) + + def run(self, x): + # computation + if self.eigv: + return numpy.linalg.eig(x) + return (numpy.linalg.eigvals(x), ) + + def infer_shapes(self, x): + # shape inference, if you don't know what to + # write, just return `ShapeObject(None)` + if self.eigv: + return ( + ShapeObject( + x.shape, dtype=x.dtype, + name=self.__class__.__name__ + 'Values'), + ShapeObject( + x.shape, dtype=x.dtype, + name=self.__class__.__name__ + 'Vectors')) + return (ShapeObject(x.shape, dtype=x.dtype, + name=self.__class__.__name__), ) + +######################################## +# Registration +# ^^^^^^^^^^^^ + + +update_registered_converter( + LiveDecorrelateTransformer, "SklearnLiveDecorrelateTransformer", + live_decorrelate_transformer_shape_calculator, + live_decorrelate_transformer_converter) + +####################################### +# Final example +# +++++++++++++ + + +data = load_iris() +X = data.data + +dec = LiveDecorrelateTransformer() +dec.fit(X) + +onx = to_onnx(dec, X.astype(numpy.float32)) + +register_operator(OpEig, name='Eig', overwrite=False) + +oinf = OnnxInference(onx) + +exp = dec.transform(X.astype(numpy.float32)) +got = oinf.run({'X': X.astype(numpy.float32)})['variable'] + + +def diff(p1, p2): + p1 = p1.ravel() + p2 = p2.ravel() + d = numpy.abs(p2 - p1) + return d.max(), (d / numpy.abs(p1)).max() + + +print(diff(exp, got)) + +############################################# +# It works! + +############################# +# Final graph +# +++++++++++ + +oinf = OnnxInference(onx) +ax = plot_graphviz(oinf.to_dot()) +ax.get_xaxis().set_visible(False) +ax.get_yaxis().set_visible(False) diff --git a/docs/tutorial/plot_qextend_onnxruntime.py b/docs/tutorial/plot_qextend_onnxruntime.py new file mode 100644 index 000000000..fc7e4d168 --- /dev/null +++ b/docs/tutorial/plot_qextend_onnxruntime.py @@ -0,0 +1,19 @@ +""" +Fast runtime with onnxruntime +============================= + +:epkg:`ONNX operators` does not contain operator +from :epkg:`numpy`. There is no operator for +`solve `_ but this one +is needed to implement the prediction function +of model :epkg:`NMF`. The converter can be written +including a new ONNX operator but then it requires a +runtime for it to be tested. Example +:ref:`l-extend-python-runtime` shows how to do that +with :epkg:`mlprodict`. Doing the same with +:epkg:`onnxruntime` is more ambitious as it requires +C++... + +*to be continued* +""" diff --git a/docs/tutorial_1-5_external.rst b/docs/tutorial_1-5_external.rst new file mode 100644 index 000000000..ac28170b5 --- /dev/null +++ b/docs/tutorial_1-5_external.rst @@ -0,0 +1,15 @@ +Using converter from other libraries +==================================== + +Before starting writing our own converter, +we can use some available in other libraries +than :epkg:`sklearn-onnx`. :epkg:`onnxmltools` implements +converters for :epkg:`xgboost` and :epkg:`LightGBM`. +Following examples show how to use the conveter when the +model are part of a pipeline. + +.. toctree:: + :maxdepth: 1 + + auto_tutorial/plot_gexternal_lightgbm + auto_tutorial/plot_gexternal_xgboost diff --git a/docs/tutorial_1_simple.rst b/docs/tutorial_1_simple.rst new file mode 100644 index 000000000..740df6626 --- /dev/null +++ b/docs/tutorial_1_simple.rst @@ -0,0 +1,25 @@ + +The easy case +============= + +The easy case is when the machine learned model +can be converter into ONNX with a converting library +without writing nay specific code. That means that a converter +exists for the model or each piece of the model, +the converter produces an ONNX graph where every node +is part of the existing ONNX specifications, the runtime +used to compute the predictions implements every node +used in the ONNX graph. + +.. toctree:: + :maxdepth: 1 + + auto_tutorial/plot_abegin_convert_pipeline + auto_tutorial/plot_bbegin_measure_time + auto_tutorial/plot_cbegin_opset + auto_tutorial/plot_dbegin_options + auto_tutorial/plot_dbegin_options_list + auto_tutorial/plot_ebegin_float_double + auto_tutorial/plot_fbegin_investigate + auto_tutorial/plot_gbegin_dataframe + auto_tutorial/plot_gbegin_transfer_learning diff --git a/docs/tutorial_2_new_converter.rst b/docs/tutorial_2_new_converter.rst new file mode 100644 index 000000000..61a3e3dd9 --- /dev/null +++ b/docs/tutorial_2_new_converter.rst @@ -0,0 +1,38 @@ +A custom converter for a custom model +===================================== + +When :epkg:`sklearn-onnx` converts a :epkg:`scikit-learn` +pipeline, it looks into every transformer and predictor +and fetches the associated converter. The resulting +ONNX graph combines the outcome of every converter +in a single graph. If a model does not have its converter, +it displays an error message telling it misses a converter. + +.. runpython:: + :showcode: + + import numpy + from sklearn.linear_model import LogisticRegression + from skl2onnx import to_onnx + + + class MyLogisticRegression(LogisticRegression): + pass + + + X = numpy.array([[0, 0.1]]) + try: + to_onnx(MyLogisticRegression(), X) + except Exception as e: + print(e) + +Following section shows how to create a custom converter. + +.. toctree:: + :maxdepth: 1 + + auto_tutorial/plot_icustom_converter + auto_tutorial/plot_jcustom_syntax + auto_tutorial/plot_kcustom_converter_wrapper + auto_tutorial/plot_lcustom_options + auto_tutorial/plot_mcustom_parser diff --git a/docs/tutorial_3_new_operator.rst b/docs/tutorial_3_new_operator.rst new file mode 100644 index 000000000..6681c2959 --- /dev/null +++ b/docs/tutorial_3_new_operator.rst @@ -0,0 +1,23 @@ + +Extend ONNX, extend runtime +=========================== + +Existing converters assume it is possible to convert +a model with the current list of :epkg:`ONNX operators`. +This list is growing at every version but it may happen +a new node is needed. It could be added to ONNX specifications, +it requires a new release, but that's not mandatory. +New nodes can easily be created by using a different domain. +A domain defines a set of operators, there are currently two +officially supported domains: :epkg:`ONNX operators` and +:epkg:`ONNX ML operators`. Custom domains can be used. +Once this new node is defined, a converter can use it. +That leaves the last issue: the runtime must be aware +of the implementation attached to this new node. +That's the difficult part. + +.. toctree:: + :maxdepth: 1 + + auto_tutorial/plot_pextend_python_runtime + auto_tutorial/plot_qextend_onnxruntime \ No newline at end of file diff --git a/skl2onnx/tutorial/__init__.py b/skl2onnx/tutorial/__init__.py new file mode 100644 index 000000000..9baebadf9 --- /dev/null +++ b/skl2onnx/tutorial/__init__.py @@ -0,0 +1,5 @@ +""" +Shortcuts to *tutorial*. +""" + +from .benchmark import measure_time # noqa diff --git a/skl2onnx/tutorial/benchmark.py b/skl2onnx/tutorial/benchmark.py new file mode 100644 index 000000000..bcab7e443 --- /dev/null +++ b/skl2onnx/tutorial/benchmark.py @@ -0,0 +1,43 @@ +""" +Tools to help benchmarking. +""" +from timeit import Timer +import numpy + + +def measure_time(stmt, context, repeat=10, number=50, div_by_number=False): + """ + Measures a statement and returns the results as a dictionary. + + :param stmt: string + :param context: variable to know in a dictionary + :param repeat: average over *repeat* experiment + :param number: number of executions in one row + :param div_by_number: divide by the number of executions + :return: dictionary + + .. runpython:: + :showcode: + + from skl2onnx.tutorial import measure_time + from math import cos + + res = measure_time("cos(x)", context=dict(cos=cos, x=5.)) + print(res) + + See `Timer.repeat `_ + for a better understanding of parameter *repeat* and *number*. + The function returns a duration corresponding to + *number* times the execution of the main statement. + """ + tim = Timer(stmt, globals=context) + res = numpy.array(tim.repeat(repeat=repeat, number=number)) + if div_by_number: + res /= number + mean = numpy.mean(res) + dev = numpy.mean(res ** 2) + dev = (dev - mean**2) ** 0.5 + mes = dict(average=mean, deviation=dev, min_exec=numpy.min(res), + max_exec=numpy.max(res), repeat=repeat, number=number) + return mes diff --git a/skl2onnx/tutorial/imagenet_classes.py b/skl2onnx/tutorial/imagenet_classes.py new file mode 100644 index 000000000..a17e7c22a --- /dev/null +++ b/skl2onnx/tutorial/imagenet_classes.py @@ -0,0 +1,1031 @@ +""" +Informations related to the :epkg:`ImageNet` competition. +""" + +class_names = { + 0: 'tench, Tinca tinca', + 1: 'goldfish, Carassius auratus', + 2: 'great white shark, white shark, man-eater, man-eating shark, ' + 'Carcharodon carcharias', + 3: 'tiger shark, Galeocerdo cuvieri', + 4: 'hammerhead, hammerhead shark', + 5: 'electric ray, crampfish, numbfish, torpedo', + 6: 'stingray', + 7: 'cock', + 8: 'hen', + 9: 'ostrich, Struthio camelus', + 10: 'brambling, Fringilla montifringilla', + 11: 'goldfinch, Carduelis carduelis', + 12: 'house finch, linnet, Carpodacus mexicanus', + 13: 'junco, snowbird', + 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea', + 15: 'robin, American robin, Turdus migratorius', + 16: 'bulbul', + 17: 'jay', + 18: 'magpie', + 19: 'chickadee', + 20: 'water ouzel, dipper', + 21: 'kite', + 22: 'bald eagle, American eagle, Haliaeetus leucocephalus', + 23: 'vulture', + 24: 'great grey owl, great gray owl, Strix nebulosa', + 25: 'European fire salamander, Salamandra salamandra', + 26: 'common newt, Triturus vulgaris', + 27: 'eft', + 28: 'spotted salamander, Ambystoma maculatum', + 29: 'axolotl, mud puppy, Ambystoma mexicanum', + 30: 'bullfrog, Rana catesbeiana', + 31: 'tree frog, tree-frog', + 32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui', + 33: 'loggerhead, loggerhead turtle, Caretta caretta', + 34: 'leatherback turtle, leatherback, leathery turtle, ' + 'Dermochelys coriacea', + 35: 'mud turtle', + 36: 'terrapin', + 37: 'box turtle, box tortoise', + 38: 'banded gecko', + 39: 'common iguana, iguana, Iguana iguana', + 40: 'American chameleon, anole, Anolis carolinensis', + 41: 'whiptail, whiptail lizard', + 42: 'agama', + 43: 'frilled lizard, Chlamydosaurus kingi', + 44: 'alligator lizard', + 45: 'Gila monster, Heloderma suspectum', + 46: 'green lizard, Lacerta viridis', + 47: 'African chameleon, Chamaeleo chamaeleon', + 48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, ' + 'Varanus komodoensis', + 49: 'African crocodile, Nile crocodile, Crocodylus niloticus', + 50: 'American alligator, Alligator mississipiensis', + 51: 'triceratops', + 52: 'thunder snake, worm snake, Carphophis amoenus', + 53: 'ringneck snake, ring-necked snake, ring snake', + 54: 'hognose snake, puff adder, sand viper', + 55: 'green snake, grass snake', + 56: 'king snake, kingsnake', + 57: 'garter snake, grass snake', + 58: 'water snake', + 59: 'vine snake', + 60: 'night snake, Hypsiglena torquata', + 61: 'boa constrictor, Constrictor constrictor', + 62: 'rock python, rock snake, Python sebae', + 63: 'Indian cobra, Naja naja', + 64: 'green mamba', + 65: 'sea snake', + 66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus', + 67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus', + 68: 'sidewinder, horned rattlesnake, Crotalus cerastes', + 69: 'trilobite', + 70: 'harvestman, daddy longlegs, Phalangium opilio', + 71: 'scorpion', + 72: 'black and gold garden spider, Argiope aurantia', + 73: 'barn spider, Araneus cavaticus', + 74: 'garden spider, Aranea diademata', + 75: 'black widow, Latrodectus mactans', + 76: 'tarantula', + 77: 'wolf spider, hunting spider', + 78: 'tick', + 79: 'centipede', + 80: 'black grouse', + 81: 'ptarmigan', + 82: 'ruffed grouse, partridge, Bonasa umbellus', + 83: 'prairie chicken, prairie grouse, prairie fowl', + 84: 'peacock', + 85: 'quail', + 86: 'partridge', + 87: 'African grey, African gray, Psittacus erithacus', + 88: 'macaw', + 89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita', + 90: 'lorikeet', + 91: 'coucal', + 92: 'bee eater', + 93: 'hornbill', + 94: 'hummingbird', + 95: 'jacamar', + 96: 'toucan', + 97: 'drake', + 98: 'red-breasted merganser, Mergus serrator', + 99: 'goose', + 100: 'black swan, Cygnus atratus', + 101: 'tusker', + 102: 'echidna, spiny anteater, anteater', + 103: 'platypus, duckbill, duckbilled platypus, duck-billed ' + 'platypus, Ornithorhynchus anatinus', + 104: 'wallaby, brush kangaroo', + 105: 'koala, koala bear, kangaroo bear, native bear, ' + 'Phascolarctos cinereus', + 106: 'wombat', + 107: 'jellyfish', + 108: 'sea anemone, anemone', + 109: 'brain coral', + 110: 'flatworm, platyhelminth', + 111: 'nematode, nematode worm, roundworm', + 112: 'conch', + 113: 'snail', + 114: 'slug', + 115: 'sea slug, nudibranch', + 116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore', + 117: 'chambered nautilus, pearly nautilus, nautilus', + 118: 'Dungeness crab, Cancer magister', + 119: 'rock crab, Cancer irroratus', + 120: 'fiddler crab', + 121: 'king crab, Alaska crab, Alaskan king crab, Alaska king ' + 'crab, Paralithodes camtschatica', + 122: 'American lobster, Northern lobster, Maine lobster, ' + 'Homarus americanus', + 123: 'spiny lobster, langouste, rock lobster, crawfish, ' + 'crayfish, sea crawfish', + 124: 'crayfish, crawfish, crawdad, crawdaddy', + 125: 'hermit crab', + 126: 'isopod', + 127: 'white stork, Ciconia ciconia', + 128: 'black stork, Ciconia nigra', + 129: 'spoonbill', + 130: 'flamingo', + 131: 'little blue heron, Egretta caerulea', + 132: 'American egret, great white heron, Egretta albus', + 133: 'bittern', + 134: 'crane', + 135: 'limpkin, Aramus pictus', + 136: 'European gallinule, Porphyrio porphyrio', + 137: 'American coot, marsh hen, mud hen, water hen, Fulica americana', + 138: 'bustard', + 139: 'ruddy turnstone, Arenaria interpres', + 140: 'red-backed sandpiper, dunlin, Erolia alpina', + 141: 'redshank, Tringa totanus', + 142: 'dowitcher', + 143: 'oystercatcher, oyster catcher', + 144: 'pelican', + 145: 'king penguin, Aptenodytes patagonica', + 146: 'albatross, mollymawk', + 147: 'grey whale, gray whale, devilfish, Eschrichtius ' + 'gibbosus, Eschrichtius robustus', + 148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca', + 149: 'dugong, Dugong dugon', + 150: 'sea lion', + 151: 'Chihuahua', + 152: 'Japanese spaniel', + 153: 'Maltese dog, Maltese terrier, Maltese', + 154: 'Pekinese, Pekingese, Peke', + 155: 'Shih-Tzu', + 156: 'Blenheim spaniel', + 157: 'papillon', + 158: 'toy terrier', + 159: 'Rhodesian ridgeback', + 160: 'Afghan hound, Afghan', + 161: 'basset, basset hound', + 162: 'beagle', + 163: 'bloodhound, sleuthhound', + 164: 'bluetick', + 165: 'black-and-tan coonhound', + 166: 'Walker hound, Walker foxhound', + 167: 'English foxhound', + 168: 'redbone', + 169: 'borzoi, Russian wolfhound', + 170: 'Irish wolfhound', + 171: 'Italian greyhound', + 172: 'whippet', + 173: 'Ibizan hound, Ibizan Podenco', + 174: 'Norwegian elkhound, elkhound', + 175: 'otterhound, otter hound', + 176: 'Saluki, gazelle hound', + 177: 'Scottish deerhound, deerhound', + 178: 'Weimaraner', + 179: 'Staffordshire bullterrier, Staffordshire bull terrier', + 180: 'American Staffordshire terrier, Staffordshire terrier, ' + 'American pit bull terrier, pit bull terrier', + 181: 'Bedlington terrier', + 182: 'Border terrier', + 183: 'Kerry blue terrier', + 184: 'Irish terrier', + 185: 'Norfolk terrier', + 186: 'Norwich terrier', + 187: 'Yorkshire terrier', + 188: 'wire-haired fox terrier', + 189: 'Lakeland terrier', + 190: 'Sealyham terrier, Sealyham', + 191: 'Airedale, Airedale terrier', + 192: 'cairn, cairn terrier', + 193: 'Australian terrier', + 194: 'Dandie Dinmont, Dandie Dinmont terrier', + 195: 'Boston bull, Boston terrier', + 196: 'miniature schnauzer', + 197: 'giant schnauzer', + 198: 'standard schnauzer', + 199: 'Scotch terrier, Scottish terrier, Scottie', + 200: 'Tibetan terrier, chrysanthemum dog', + 201: 'silky terrier, Sydney silky', + 202: 'soft-coated wheaten terrier', + 203: 'West Highland white terrier', + 204: 'Lhasa, Lhasa apso', + 205: 'flat-coated retriever', + 206: 'curly-coated retriever', + 207: 'golden retriever', + 208: 'Labrador retriever', + 209: 'Chesapeake Bay retriever', + 210: 'German short-haired pointer', + 211: 'vizsla, Hungarian pointer', + 212: 'English setter', + 213: 'Irish setter, red setter', + 214: 'Gordon setter', + 215: 'Brittany spaniel', + 216: 'clumber, clumber spaniel', + 217: 'English springer, English springer spaniel', + 218: 'Welsh springer spaniel', + 219: 'cocker spaniel, English cocker spaniel, cocker', + 220: 'Sussex spaniel', + 221: 'Irish water spaniel', + 222: 'kuvasz', + 223: 'schipperke', + 224: 'groenendael', + 225: 'malinois', + 226: 'briard', + 227: 'kelpie', + 228: 'komondor', + 229: 'Old English sheepdog, bobtail', + 230: 'Shetland sheepdog, Shetland sheep dog, Shetland', + 231: 'collie', + 232: 'Border collie', + 233: 'Bouvier des Flandres, Bouviers des Flandres', + 234: 'Rottweiler', + 235: 'German shepherd, German shepherd dog, German police dog, alsatian', + 236: 'Doberman, Doberman pinscher', + 237: 'miniature pinscher', + 238: 'Greater Swiss Mountain dog', + 239: 'Bernese mountain dog', + 240: 'Appenzeller', + 241: 'EntleBucher', + 242: 'boxer', + 243: 'bull mastiff', + 244: 'Tibetan mastiff', + 245: 'French bulldog', + 246: 'Great Dane', + 247: 'Saint Bernard, St Bernard', + 248: 'Eskimo dog, husky', + 249: 'malamute, malemute, Alaskan malamute', + 250: 'Siberian husky', + 251: 'dalmatian, coach dog, carriage dog', + 252: 'affenpinscher, monkey pinscher, monkey dog', + 253: 'basenji', + 254: 'pug, pug-dog', + 255: 'Leonberg', + 256: 'Newfoundland, Newfoundland dog', + 257: 'Great Pyrenees', + 258: 'Samoyed, Samoyede', + 259: 'Pomeranian', + 260: 'chow, chow chow', + 261: 'keeshond', + 262: 'Brabancon griffon', + 263: 'Pembroke, Pembroke Welsh corgi', + 264: 'Cardigan, Cardigan Welsh corgi', + 265: 'toy poodle', + 266: 'miniature poodle', + 267: 'standard poodle', + 268: 'Mexican hairless', + 269: 'timber wolf, grey wolf, gray wolf, Canis lupus', + 270: 'white wolf, Arctic wolf, Canis lupus tundrarum', + 271: 'red wolf, maned wolf, Canis rufus, Canis niger', + 272: 'coyote, prairie wolf, brush wolf, Canis latrans', + 273: 'dingo, warrigal, warragal, Canis dingo', + 274: 'dhole, Cuon alpinus', + 275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus', + 276: 'hyena, hyaena', + 277: 'red fox, Vulpes vulpes', + 278: 'kit fox, Vulpes macrotis', + 279: 'Arctic fox, white fox, Alopex lagopus', + 280: 'grey fox, gray fox, Urocyon cinereoargenteus', + 281: 'tabby, tabby cat', + 282: 'tiger cat', + 283: 'Persian cat', + 284: 'Siamese cat, Siamese', + 285: 'Egyptian cat', + 286: 'cougar, puma, catamount, mountain lion, painter, ' + 'panther, Felis concolor', + 287: 'lynx, catamount', + 288: 'leopard, Panthera pardus', + 289: 'snow leopard, ounce, Panthera uncia', + 290: 'jaguar, panther, Panthera onca, Felis onca', + 291: 'lion, king of beasts, Panthera leo', + 292: 'tiger, Panthera tigris', + 293: 'cheetah, chetah, Acinonyx jubatus', + 294: 'brown bear, bruin, Ursus arctos', + 295: 'American black bear, black bear, Ursus americanus, ' + 'Euarctos americanus', + 296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus', + 297: 'sloth bear, Melursus ursinus, Ursus ursinus', + 298: 'mongoose', + 299: 'meerkat, mierkat', + 300: 'tiger beetle', + 301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle', + 302: 'ground beetle, carabid beetle', + 303: 'long-horned beetle, longicorn, longicorn beetle', + 304: 'leaf beetle, chrysomelid', + 305: 'dung beetle', + 306: 'rhinoceros beetle', + 307: 'weevil', + 308: 'fly', + 309: 'bee', + 310: 'ant, emmet, pismire', + 311: 'grasshopper, hopper', + 312: 'cricket', + 313: 'walking stick, walkingstick, stick insect', + 314: 'cockroach, roach', + 315: 'mantis, mantid', + 316: 'cicada, cicala', + 317: 'leafhopper', + 318: 'lacewing, lacewing fly', + 319: "dragonfly, darning needle, devil's darning needle, " + "sewing needle, snake feeder, snake doctor, mosquito " + "hawk, skeeter hawk", + 320: 'damselfly', + 321: 'admiral', + 322: 'ringlet, ringlet butterfly', + 323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus', + 324: 'cabbage butterfly', + 325: 'sulphur butterfly, sulfur butterfly', + 326: 'lycaenid, lycaenid butterfly', + 327: 'starfish, sea star', + 328: 'sea urchin', + 329: 'sea cucumber, holothurian', + 330: 'wood rabbit, cottontail, cottontail rabbit', + 331: 'hare', + 332: 'Angora, Angora rabbit', + 333: 'hamster', + 334: 'porcupine, hedgehog', + 335: 'fox squirrel, eastern fox squirrel, Sciurus niger', + 336: 'marmot', + 337: 'beaver', + 338: 'guinea pig, Cavia cobaya', + 339: 'sorrel', + 340: 'zebra', + 341: 'hog, pig, grunter, squealer, Sus scrofa', + 342: 'wild boar, boar, Sus scrofa', + 343: 'warthog', + 344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius', + 345: 'ox', + 346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis', + 347: 'bison', + 348: 'ram, tup', + 349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain ' + 'bighorn, Rocky Mountain sheep, Ovis canadensis', + 350: 'ibex, Capra ibex', + 351: 'hartebeest', + 352: 'impala, Aepyceros melampus', + 353: 'gazelle', + 354: 'Arabian camel, dromedary, Camelus dromedarius', + 355: 'llama', + 356: 'weasel', + 357: 'mink', + 358: 'polecat, fitch, foulmart, foumart, Mustela putorius', + 359: 'black-footed ferret, ferret, Mustela nigripes', + 360: 'otter', + 361: 'skunk, polecat, wood pussy', + 362: 'badger', + 363: 'armadillo', + 364: 'three-toed sloth, ai, Bradypus tridactylus', + 365: 'orangutan, orang, orangutang, Pongo pygmaeus', + 366: 'gorilla, Gorilla gorilla', + 367: 'chimpanzee, chimp, Pan troglodytes', + 368: 'gibbon, Hylobates lar', + 369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus', + 370: 'guenon, guenon monkey', + 371: 'patas, hussar monkey, Erythrocebus patas', + 372: 'baboon', + 373: 'macaque', + 374: 'langur', + 375: 'colobus, colobus monkey', + 376: 'proboscis monkey, Nasalis larvatus', + 377: 'marmoset', + 378: 'capuchin, ringtail, Cebus capucinus', + 379: 'howler monkey, howler', + 380: 'titi, titi monkey', + 381: 'spider monkey, Ateles geoffroyi', + 382: 'squirrel monkey, Saimiri sciureus', + 383: 'Madagascar cat, ring-tailed lemur, Lemur catta', + 384: 'indri, indris, Indri indri, Indri brevicaudatus', + 385: 'Indian elephant, Elephas maximus', + 386: 'African elephant, Loxodonta africana', + 387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens', + 388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca', + 389: 'barracouta, snoek', + 390: 'eel', + 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, ' + 'Oncorhynchus kisutch', + 392: 'rock beauty, Holocanthus tricolor', + 393: 'anemone fish', + 394: 'sturgeon', + 395: 'gar, garfish, garpike, billfish, Lepisosteus osseus', + 396: 'lionfish', + 397: 'puffer, pufferfish, blowfish, globefish', + 398: 'abacus', + 399: 'abaya', + 400: "academic gown, academic robe, judge's robe", + 401: 'accordion, piano accordion, squeeze box', + 402: 'acoustic guitar', + 403: 'aircraft carrier, carrier, flattop, attack aircraft carrier', + 404: 'airliner', + 405: 'airship, dirigible', + 406: 'altar', + 407: 'ambulance', + 408: 'amphibian, amphibious vehicle', + 409: 'analog clock', + 410: 'apiary, bee house', + 411: 'apron', + 412: 'ashcan, trash can, garbage can, wastebin, ash bin, ' + 'ash-bin, ashbin, dustbin, trash barrel, trash bin', + 413: 'assault rifle, assault gun', + 414: 'backpack, back pack, knapsack, packsack, rucksack, haversack', + 415: 'bakery, bakeshop, bakehouse', + 416: 'balance beam, beam', + 417: 'balloon', + 418: 'ballpoint, ballpoint pen, ballpen, Biro', + 419: 'Band Aid', + 420: 'banjo', + 421: 'bannister, banister, balustrade, balusters, handrail', + 422: 'barbell', + 423: 'barber chair', + 424: 'barbershop', + 425: 'barn', + 426: 'barometer', + 427: 'barrel, cask', + 428: 'barrow, garden cart, lawn cart, wheelbarrow', + 429: 'baseball', + 430: 'basketball', + 431: 'bassinet', + 432: 'bassoon', + 433: 'bathing cap, swimming cap', + 434: 'bath towel', + 435: 'bathtub, bathing tub, bath, tub', + 436: 'beach wagon, station wagon, wagon, estate car, ' + 'beach waggon, station waggon, waggon', + 437: 'beacon, lighthouse, beacon light, pharos', + 438: 'beaker', + 439: 'bearskin, busby, shako', + 440: 'beer bottle', + 441: 'beer glass', + 442: 'bell cote, bell cot', + 443: 'bib', + 444: 'bicycle-built-for-two, tandem bicycle, tandem', + 445: 'bikini, two-piece', + 446: 'binder, ring-binder', + 447: 'binoculars, field glasses, opera glasses', + 448: 'birdhouse', + 449: 'boathouse', + 450: 'bobsled, bobsleigh, bob', + 451: 'bolo tie, bolo, bola tie, bola', + 452: 'bonnet, poke bonnet', + 453: 'bookcase', + 454: 'bookshop, bookstore, bookstall', + 455: 'bottlecap', + 456: 'bow', + 457: 'bow tie, bow-tie, bowtie', + 458: 'brass, memorial tablet, plaque', + 459: 'brassiere, bra, bandeau', + 460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty', + 461: 'breastplate, aegis, egis', + 462: 'broom', + 463: 'bucket, pail', + 464: 'buckle', + 465: 'bulletproof vest', + 466: 'bullet train, bullet', + 467: 'butcher shop, meat market', + 468: 'cab, hack, taxi, taxicab', + 469: 'caldron, cauldron', + 470: 'candle, taper, wax light', + 471: 'cannon', + 472: 'canoe', + 473: 'can opener, tin opener', + 474: 'cardigan', + 475: 'car mirror', + 476: 'carousel, carrousel, merry-go-round, roundabout, whirligig', + 477: "carpenter's kit, tool kit", + 478: 'carton', + 479: 'car wheel', + 480: 'cash machine, cash dispenser, automated teller ' + 'machine, automatic teller machine, automated teller, ' + 'automatic teller, ATM', + 481: 'cassette', + 482: 'cassette player', + 483: 'castle', + 484: 'catamaran', + 485: 'CD player', + 486: 'cello, violoncello', + 487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone', + 488: 'chain', + 489: 'chainlink fence', + 490: 'chain mail, ring mail, mail, chain armor, chain ' + 'armour, ring armor, ring armour', + 491: 'chain saw, chainsaw', + 492: 'chest', + 493: 'chiffonier, commode', + 494: 'chime, bell, gong', + 495: 'china cabinet, china closet', + 496: 'Christmas stocking', + 497: 'church, church building', + 498: 'cinema, movie theater, movie theatre, movie house, picture palace', + 499: 'cleaver, meat cleaver, chopper', + 500: 'cliff dwelling', + 501: 'cloak', + 502: 'clog, geta, patten, sabot', + 503: 'cocktail shaker', + 504: 'coffee mug', + 505: 'coffeepot', + 506: 'coil, spiral, volute, whorl, helix', + 507: 'combination lock', + 508: 'computer keyboard, keypad', + 509: 'confectionery, confectionary, candy store', + 510: 'container ship, containership, container vessel', + 511: 'convertible', + 512: 'corkscrew, bottle screw', + 513: 'cornet, horn, trumpet, trump', + 514: 'cowboy boot', + 515: 'cowboy hat, ten-gallon hat', + 516: 'cradle', + 517: 'crane', + 518: 'crash helmet', + 519: 'crate', + 520: 'crib, cot', + 521: 'Crock Pot', + 522: 'croquet ball', + 523: 'crutch', + 524: 'cuirass', + 525: 'dam, dike, dyke', + 526: 'desk', + 527: 'desktop computer', + 528: 'dial telephone, dial phone', + 529: 'diaper, nappy, napkin', + 530: 'digital clock', + 531: 'digital watch', + 532: 'dining table, board', + 533: 'dishrag, dishcloth', + 534: 'dishwasher, dish washer, dishwashing machine', + 535: 'disk brake, disc brake', + 536: 'dock, dockage, docking facility', + 537: 'dogsled, dog sled, dog sleigh', + 538: 'dome', + 539: 'doormat, welcome mat', + 540: 'drilling platform, offshore rig', + 541: 'drum, membranophone, tympan', + 542: 'drumstick', + 543: 'dumbbell', + 544: 'Dutch oven', + 545: 'electric fan, blower', + 546: 'electric guitar', + 547: 'electric locomotive', + 548: 'entertainment center', + 549: 'envelope', + 550: 'espresso maker', + 551: 'face powder', + 552: 'feather boa, boa', + 553: 'file, file cabinet, filing cabinet', + 554: 'fireboat', + 555: 'fire engine, fire truck', + 556: 'fire screen, fireguard', + 557: 'flagpole, flagstaff', + 558: 'flute, transverse flute', + 559: 'folding chair', + 560: 'football helmet', + 561: 'forklift', + 562: 'fountain', + 563: 'fountain pen', + 564: 'four-poster', + 565: 'freight car', + 566: 'French horn, horn', + 567: 'frying pan, frypan, skillet', + 568: 'fur coat', + 569: 'garbage truck, dustcart', + 570: 'gasmask, respirator, gas helmet', + 571: 'gas pump, gasoline pump, petrol pump, island dispenser', + 572: 'goblet', + 573: 'go-kart', + 574: 'golf ball', + 575: 'golfcart, golf cart', + 576: 'gondola', + 577: 'gong, tam-tam', + 578: 'gown', + 579: 'grand piano, grand', + 580: 'greenhouse, nursery, glasshouse', + 581: 'grille, radiator grille', + 582: 'grocery store, grocery, food market, market', + 583: 'guillotine', + 584: 'hair slide', + 585: 'hair spray', + 586: 'half track', + 587: 'hammer', + 588: 'hamper', + 589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier', + 590: 'hand-held computer, hand-held microcomputer', + 591: 'handkerchief, hankie, hanky, hankey', + 592: 'hard disc, hard disk, fixed disk', + 593: 'harmonica, mouth organ, harp, mouth harp', + 594: 'harp', + 595: 'harvester, reaper', + 596: 'hatchet', + 597: 'holster', + 598: 'home theater, home theatre', + 599: 'honeycomb', + 600: 'hook, claw', + 601: 'hoopskirt, crinoline', + 602: 'horizontal bar, high bar', + 603: 'horse cart, horse-cart', + 604: 'hourglass', + 605: 'iPod', + 606: 'iron, smoothing iron', + 607: "jack-o'-lantern", + 608: 'jean, blue jean, denim', + 609: 'jeep, landrover', + 610: 'jersey, T-shirt, tee shirt', + 611: 'jigsaw puzzle', + 612: 'jinrikisha, ricksha, rickshaw', + 613: 'joystick', + 614: 'kimono', + 615: 'knee pad', + 616: 'knot', + 617: 'lab coat, laboratory coat', + 618: 'ladle', + 619: 'lampshade, lamp shade', + 620: 'laptop, laptop computer', + 621: 'lawn mower, mower', + 622: 'lens cap, lens cover', + 623: 'letter opener, paper knife, paperknife', + 624: 'library', + 625: 'lifeboat', + 626: 'lighter, light, igniter, ignitor', + 627: 'limousine, limo', + 628: 'liner, ocean liner', + 629: 'lipstick, lip rouge', + 630: 'Loafer', + 631: 'lotion', + 632: 'loudspeaker, speaker, speaker unit, loudspeaker ' + 'system, speaker system', + 633: "loupe, jeweler's loupe", + 634: 'lumbermill, sawmill', + 635: 'magnetic compass', + 636: 'mailbag, postbag', + 637: 'mailbox, letter box', + 638: 'maillot', + 639: 'maillot, tank suit', + 640: 'manhole cover', + 641: 'maraca', + 642: 'marimba, xylophone', + 643: 'mask', + 644: 'matchstick', + 645: 'maypole', + 646: 'maze, labyrinth', + 647: 'measuring cup', + 648: 'medicine chest, medicine cabinet', + 649: 'megalith, megalithic structure', + 650: 'microphone, mike', + 651: 'microwave, microwave oven', + 652: 'military uniform', + 653: 'milk can', + 654: 'minibus', + 655: 'miniskirt, mini', + 656: 'minivan', + 657: 'missile', + 658: 'mitten', + 659: 'mixing bowl', + 660: 'mobile home, manufactured home', + 661: 'Model T', + 662: 'modem', + 663: 'monastery', + 664: 'monitor', + 665: 'moped', + 666: 'mortar', + 667: 'mortarboard', + 668: 'mosque', + 669: 'mosquito net', + 670: 'motor scooter, scooter', + 671: 'mountain bike, all-terrain bike, off-roader', + 672: 'mountain tent', + 673: 'mouse, computer mouse', + 674: 'mousetrap', + 675: 'moving van', + 676: 'muzzle', + 677: 'nail', + 678: 'neck brace', + 679: 'necklace', + 680: 'nipple', + 681: 'notebook, notebook computer', + 682: 'obelisk', + 683: 'oboe, hautboy, hautbois', + 684: 'ocarina, sweet potato', + 685: 'odometer, hodometer, mileometer, milometer', + 686: 'oil filter', + 687: 'organ, pipe organ', + 688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO', + 689: 'overskirt', + 690: 'oxcart', + 691: 'oxygen mask', + 692: 'packet', + 693: 'paddle, boat paddle', + 694: 'paddlewheel, paddle wheel', + 695: 'padlock', + 696: 'paintbrush', + 697: "pajama, pyjama, pj's, jammies", + 698: 'palace', + 699: 'panpipe, pandean pipe, syrinx', + 700: 'paper towel', + 701: 'parachute, chute', + 702: 'parallel bars, bars', + 703: 'park bench', + 704: 'parking meter', + 705: 'passenger car, coach, carriage', + 706: 'patio, terrace', + 707: 'pay-phone, pay-station', + 708: 'pedestal, plinth, footstall', + 709: 'pencil box, pencil case', + 710: 'pencil sharpener', + 711: 'perfume, essence', + 712: 'Petri dish', + 713: 'photocopier', + 714: 'pick, plectrum, plectron', + 715: 'pickelhaube', + 716: 'picket fence, paling', + 717: 'pickup, pickup truck', + 718: 'pier', + 719: 'piggy bank, penny bank', + 720: 'pill bottle', + 721: 'pillow', + 722: 'ping-pong ball', + 723: 'pinwheel', + 724: 'pirate, pirate ship', + 725: 'pitcher, ewer', + 726: "plane, carpenter's plane, woodworking plane", + 727: 'planetarium', + 728: 'plastic bag', + 729: 'plate rack', + 730: 'plow, plough', + 731: "plunger, plumber's helper", + 732: 'Polaroid camera, Polaroid Land camera', + 733: 'pole', + 734: 'police van, police wagon, paddy wagon, patrol wagon, ' + 'wagon, black Maria', + 735: 'poncho', + 736: 'pool table, billiard table, snooker table', + 737: 'pop bottle, soda bottle', + 738: 'pot, flowerpot', + 739: "potter's wheel", + 740: 'power drill', + 741: 'prayer rug, prayer mat', + 742: 'printer', + 743: 'prison, prison house', + 744: 'projectile, missile', + 745: 'projector', + 746: 'puck, hockey puck', + 747: 'punching bag, punch bag, punching ball, punchball', + 748: 'purse', + 749: 'quill, quill pen', + 750: 'quilt, comforter, comfort, puff', + 751: 'racer, race car, racing car', + 752: 'racket, racquet', + 753: 'radiator', + 754: 'radio, wireless', + 755: 'radio telescope, radio reflector', + 756: 'rain barrel', + 757: 'recreational vehicle, RV, R.V.', + 758: 'reel', + 759: 'reflex camera', + 760: 'refrigerator, icebox', + 761: 'remote control, remote', + 762: 'restaurant, eating house, eating place, eatery', + 763: 'revolver, six-gun, six-shooter', + 764: 'rifle', + 765: 'rocking chair, rocker', + 766: 'rotisserie', + 767: 'rubber eraser, rubber, pencil eraser', + 768: 'rugby ball', + 769: 'rule, ruler', + 770: 'running shoe', + 771: 'safe', + 772: 'safety pin', + 773: 'saltshaker, salt shaker', + 774: 'sandal', + 775: 'sarong', + 776: 'sax, saxophone', + 777: 'scabbard', + 778: 'scale, weighing machine', + 779: 'school bus', + 780: 'schooner', + 781: 'scoreboard', + 782: 'screen, CRT screen', + 783: 'screw', + 784: 'screwdriver', + 785: 'seat belt, seatbelt', + 786: 'sewing machine', + 787: 'shield, buckler', + 788: 'shoe shop, shoe-shop, shoe store', + 789: 'shoji', + 790: 'shopping basket', + 791: 'shopping cart', + 792: 'shovel', + 793: 'shower cap', + 794: 'shower curtain', + 795: 'ski', + 796: 'ski mask', + 797: 'sleeping bag', + 798: 'slide rule, slipstick', + 799: 'sliding door', + 800: 'slot, one-armed bandit', + 801: 'snorkel', + 802: 'snowmobile', + 803: 'snowplow, snowplough', + 804: 'soap dispenser', + 805: 'soccer ball', + 806: 'sock', + 807: 'solar dish, solar collector, solar furnace', + 808: 'sombrero', + 809: 'soup bowl', + 810: 'space bar', + 811: 'space heater', + 812: 'space shuttle', + 813: 'spatula', + 814: 'speedboat', + 815: "spider web, spider's web", + 816: 'spindle', + 817: 'sports car, sport car', + 818: 'spotlight, spot', + 819: 'stage', + 820: 'steam locomotive', + 821: 'steel arch bridge', + 822: 'steel drum', + 823: 'stethoscope', + 824: 'stole', + 825: 'stone wall', + 826: 'stopwatch, stop watch', + 827: 'stove', + 828: 'strainer', + 829: 'streetcar, tram, tramcar, trolley, trolley car', + 830: 'stretcher', + 831: 'studio couch, day bed', + 832: 'stupa, tope', + 833: 'submarine, pigboat, sub, U-boat', + 834: 'suit, suit of clothes', + 835: 'sundial', + 836: 'sunglass', + 837: 'sunglasses, dark glasses, shades', + 838: 'sunscreen, sunblock, sun blocker', + 839: 'suspension bridge', + 840: 'swab, swob, mop', + 841: 'sweatshirt', + 842: 'swimming trunks, bathing trunks', + 843: 'swing', + 844: 'switch, electric switch, electrical switch', + 845: 'syringe', + 846: 'table lamp', + 847: 'tank, army tank, armored combat vehicle, armoured combat vehicle', + 848: 'tape player', + 849: 'teapot', + 850: 'teddy, teddy bear', + 851: 'television, television system', + 852: 'tennis ball', + 853: 'thatch, thatched roof', + 854: 'theater curtain, theatre curtain', + 855: 'thimble', + 856: 'thresher, thrasher, threshing machine', + 857: 'throne', + 858: 'tile roof', + 859: 'toaster', + 860: 'tobacco shop, tobacconist shop, tobacconist', + 861: 'toilet seat', + 862: 'torch', + 863: 'totem pole', + 864: 'tow truck, tow car, wrecker', + 865: 'toyshop', + 866: 'tractor', + 867: 'trailer truck, tractor trailer, trucking rig, rig, ' + 'articulated lorry, semi', + 868: 'tray', + 869: 'trench coat', + 870: 'tricycle, trike, velocipede', + 871: 'trimaran', + 872: 'tripod', + 873: 'triumphal arch', + 874: 'trolleybus, trolley coach, trackless trolley', + 875: 'trombone', + 876: 'tub, vat', + 877: 'turnstile', + 878: 'typewriter keyboard', + 879: 'umbrella', + 880: 'unicycle, monocycle', + 881: 'upright, upright piano', + 882: 'vacuum, vacuum cleaner', + 883: 'vase', + 884: 'vault', + 885: 'velvet', + 886: 'vending machine', + 887: 'vestment', + 888: 'viaduct', + 889: 'violin, fiddle', + 890: 'volleyball', + 891: 'waffle iron', + 892: 'wall clock', + 893: 'wallet, billfold, notecase, pocketbook', + 894: 'wardrobe, closet, press', + 895: 'warplane, military plane', + 896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin', + 897: 'washer, automatic washer, washing machine', + 898: 'water bottle', + 899: 'water jug', + 900: 'water tower', + 901: 'whiskey jug', + 902: 'whistle', + 903: 'wig', + 904: 'window screen', + 905: 'window shade', + 906: 'Windsor tie', + 907: 'wine bottle', + 908: 'wing', + 909: 'wok', + 910: 'wooden spoon', + 911: 'wool, woolen, woollen', + 912: 'worm fence, snake fence, snake-rail fence, Virginia fence', + 913: 'wreck', + 914: 'yawl', + 915: 'yurt', + 916: 'web site, website, internet site, site', + 917: 'comic book', + 918: 'crossword puzzle, crossword', + 919: 'street sign', + 920: 'traffic light, traffic signal, stoplight', + 921: 'book jacket, dust cover, dust jacket, dust wrapper', + 922: 'menu', + 923: 'plate', + 924: 'guacamole', + 925: 'consomme', + 926: 'hot pot, hotpot', + 927: 'trifle', + 928: 'ice cream, icecream', + 929: 'ice lolly, lolly, lollipop, popsicle', + 930: 'French loaf', + 931: 'bagel, beigel', + 932: 'pretzel', + 933: 'cheeseburger', + 934: 'hotdog, hot dog, red hot', + 935: 'mashed potato', + 936: 'head cabbage', + 937: 'broccoli', + 938: 'cauliflower', + 939: 'zucchini, courgette', + 940: 'spaghetti squash', + 941: 'acorn squash', + 942: 'butternut squash', + 943: 'cucumber, cuke', + 944: 'artichoke, globe artichoke', + 945: 'bell pepper', + 946: 'cardoon', + 947: 'mushroom', + 948: 'Granny Smith', + 949: 'strawberry', + 950: 'orange', + 951: 'lemon', + 952: 'fig', + 953: 'pineapple, ananas', + 954: 'banana', + 955: 'jackfruit, jak, jack', + 956: 'custard apple', + 957: 'pomegranate', + 958: 'hay', + 959: 'carbonara', + 960: 'chocolate sauce, chocolate syrup', + 961: 'dough', + 962: 'meat loaf, meatloaf', + 963: 'pizza, pizza pie', + 964: 'potpie', + 965: 'burrito', + 966: 'red wine', + 967: 'espresso', + 968: 'cup', + 969: 'eggnog', + 970: 'alp', + 971: 'bubble', + 972: 'cliff, drop, drop-off', + 973: 'coral reef', + 974: 'geyser', + 975: 'lakeside, lakeshore', + 976: 'promontory, headland, head, foreland', + 977: 'sandbar, sand bar', + 978: 'seashore, coast, seacoast, sea-coast', + 979: 'valley, vale', + 980: 'volcano', + 981: 'ballplayer, baseball player', + 982: 'groom, bridegroom', + 983: 'scuba diver', + 984: 'rapeseed', + 985: 'daisy', + 986: "yellow lady's slipper, yellow lady-slipper, Cypripedium " + "calceolus, Cypripedium parviflorum", + 987: 'corn', + 988: 'acorn', + 989: 'hip, rose hip, rosehip', + 990: 'buckeye, horse chestnut, conker', + 991: 'coral fungus', + 992: 'agaric', + 993: 'gyromitra', + 994: 'stinkhorn, carrion fungus', + 995: 'earthstar', + 996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, ' + 'Grifola frondosa', + 997: 'bolete', + 998: 'ear, spike, capitulum', + 999: 'toilet tissue, toilet paper, bathroom tissue'} From 4368b3de13a4cd3f198e1ff718eb1987265502fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Fri, 4 Sep 2020 00:00:48 +0200 Subject: [PATCH 08/17] Upgrade version number to 1.7.1 (#529) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * upgrade version to 1.7.1 Signed-off-by: xavier dupré --- skl2onnx/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skl2onnx/__init__.py b/skl2onnx/__init__.py index a02135946..ca38fd836 100644 --- a/skl2onnx/__init__.py +++ b/skl2onnx/__init__.py @@ -7,7 +7,7 @@ """ Main entry point to the converter from the *scikit-learn* to *onnx*. """ -__version__ = "1.7.0" +__version__ = "1.7.1" __author__ = "Microsoft" __producer__ = "skl2onnx" __producer_version__ = __version__ From 50be8b5bb7d35f73155ea111532496325837f0c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Wed, 30 Sep 2020 09:38:49 +0200 Subject: [PATCH 09/17] Fixes nightly build 9/29 (#541) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix ci build * Update test_sklearn_tfidf_vectorizer_converter_pipeline.py Signed-off-by: xavier dupré --- tests/test_sklearn_nearest_neighbour_converter.py | 3 +++ tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_sklearn_nearest_neighbour_converter.py b/tests/test_sklearn_nearest_neighbour_converter.py index 2af29f9ac..4b5d431b6 100644 --- a/tests/test_sklearn_nearest_neighbour_converter.py +++ b/tests/test_sklearn_nearest_neighbour_converter.py @@ -121,6 +121,9 @@ def test_model_knn_regressor(self): @unittest.skipIf( StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), reason="not available") + @unittest.skipIf( + StrictVersion(onnx.__version__) <= StrictVersion("1.6.0"), + reason="not available") def test_model_knn_regressor_radius(self): model, X = self._fit_model(RadiusNeighborsRegressor()) model_onnx = convert_sklearn(model, "KNN regressor", diff --git a/tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py b/tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py index 6863539d2..30be111b0 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter_pipeline.py @@ -20,8 +20,8 @@ class TestSklearnTfidfVectorizerPipeline(unittest.TestCase): def common_test_model_tfidf_vectorizer_pipeline_cls( self, kind=None, verbose=False): if kind == 'stop': - if ort_version.startswith('1.4'): - # regression with stopwords in onnxruntime 1.4 + if ort_version.startswith('1.4') or ort_version.startswith('1.5'): + # regression with stopwords in onnxruntime 1.4, 1.5 stopwords = ['theh'] else: stopwords = ['the', 'and', 'is'] From 18f710d0968b4fab0b09f95fa32c9164c5e63bb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Tue, 13 Oct 2020 20:03:13 +0200 Subject: [PATCH 10/17] Fixes nightly build 10/12 (#543) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré --- tests/test_sklearn_nearest_neighbour_converter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_sklearn_nearest_neighbour_converter.py b/tests/test_sklearn_nearest_neighbour_converter.py index 4b5d431b6..8eb2983fa 100644 --- a/tests/test_sklearn_nearest_neighbour_converter.py +++ b/tests/test_sklearn_nearest_neighbour_converter.py @@ -250,6 +250,9 @@ def test_model_knn_regressor2_1(self): @unittest.skipIf( StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), reason="not available") + @unittest.skipIf( + StrictVersion(onnx.__version__) <= StrictVersion("1.6.0"), + reason="fails for earlier version of onnx (NaN)") def test_model_knn_regressor2_1_radius(self): model, X = self._fit_model_simple( RadiusNeighborsRegressor(algorithm="brute"), From eae9a0595287483c130a98d9595ba42bdeebd390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Fri, 6 Nov 2020 10:54:11 +0100 Subject: [PATCH 11/17] Implements option zipmap='columns' to split output of a classifier into multiple vectors (#550) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Implements option zipmap='columns' to split output of a classifier into multiple vectors * Update test_sklearn_glm_classifier_converter.py * Fix issue with reserve_name, returns the key and not the value Signed-off-by: xavier dupré --- docs/examples/plot_convert_zipmap.py | 30 +++++++++-- docs/examples/plot_pipeline_lightgbm.py | 2 +- docs/examples/plot_pipeline_xgboost.py | 2 +- docs/tutorial/plot_gexternal_lightgbm.py | 2 +- docs/tutorial/plot_gexternal_xgboost.py | 2 +- skl2onnx/_parse.py | 32 ++++++++---- skl2onnx/_supported_operators.py | 2 +- skl2onnx/common/_topology.py | 2 +- skl2onnx/common/utils_classifier.py | 18 ++++++- skl2onnx/operator_converters/ada_boost.py | 2 +- skl2onnx/operator_converters/bagging.py | 2 +- .../calibrated_classifier_cv.py | 2 +- skl2onnx/operator_converters/decision_tree.py | 4 +- .../operator_converters/gradient_boosting.py | 2 +- .../operator_converters/linear_classifier.py | 2 +- .../multilayer_perceptron.py | 2 +- skl2onnx/operator_converters/naive_bayes.py | 10 ++-- .../operator_converters/nearest_neighbours.py | 4 +- .../one_vs_rest_classifier.py | 2 +- skl2onnx/operator_converters/random_forest.py | 8 +-- .../operator_converters/sgd_classifier.py | 2 +- skl2onnx/operator_converters/stacking.py | 2 +- .../support_vector_machines.py | 2 +- .../operator_converters/voting_classifier.py | 2 +- skl2onnx/operator_converters/zip_map.py | 26 ++++++++-- skl2onnx/shape_calculators/zip_map.py | 8 +++ .../test_sklearn_glm_classifier_converter.py | 51 +++++++++++++++++++ tests/test_utils/tests_helper.py | 8 +-- tests_onnxmltools/test_lightgbm.py | 2 +- tests_onnxmltools/test_xgboost_converters.py | 6 ++- 30 files changed, 187 insertions(+), 54 deletions(-) diff --git a/docs/examples/plot_convert_zipmap.py b/docs/examples/plot_convert_zipmap.py index 1542248ea..b0fa47c0c 100644 --- a/docs/examples/plot_convert_zipmap.py +++ b/docs/examples/plot_convert_zipmap.py @@ -75,6 +75,25 @@ print("probabilities type:", type(res2[1])) print("type for the first observations:", type(res2[1][0])) +################################### +# One output per class +# ++++++++++++++++++++ +# +# This options removes the final operator ZipMap and splits +# the probabilities into columns. The final model produces +# one output for the label, and one output per class. + +options = {id(clr): {'zipmap': 'columns'}} +onx3 = convert_sklearn(clr, initial_types=initial_type, options=options, + target_opset=12) + +sess3 = rt.InferenceSession(onx3.SerializeToString()) +res3 = sess3.run(None, {'float_input': X_test.astype(numpy.float32)}) +for i, out in enumerate(sess3.get_outputs()): + print("output: '{}' shape={} values={}...".format( + out.name, res3[i].shape, res3[i][:2])) + + ################################### # Let's compare prediction time # +++++++++++++++++++++++++++++ @@ -83,13 +102,18 @@ print("Time with ZipMap:") print(repeat(lambda: sess.run(None, {'float_input': X32}), - number=100, repeat=3)) + number=100, repeat=10)) print("Time without ZipMap:") print(repeat(lambda: sess2.run(None, {'float_input': X32}), - number=100, repeat=3)) + number=100, repeat=10)) + +print("Time without ZipMap but with columns:") +print(repeat(lambda: sess3.run(None, {'float_input': X32}), + number=100, repeat=10)) -# The prediction is much faster on this example. +# The prediction is much faster without ZipMap +# on this example. # The optimisation is even faster when the classes # are described with strings and not integers # as the final result (list of dictionaries) may copy diff --git a/docs/examples/plot_pipeline_lightgbm.py b/docs/examples/plot_pipeline_lightgbm.py index ead65ddd6..8083ad7b1 100644 --- a/docs/examples/plot_pipeline_lightgbm.py +++ b/docs/examples/plot_pipeline_lightgbm.py @@ -81,7 +81,7 @@ update_registered_converter( LGBMClassifier, 'LightGbmLGBMClassifier', calculate_linear_classifier_output_shapes, convert_lightgbm, - options={'nocl': [True, False], 'zipmap': [True, False]}) + options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}) ################################## # Convert again diff --git a/docs/examples/plot_pipeline_xgboost.py b/docs/examples/plot_pipeline_xgboost.py index a299efead..7e0ff7c9e 100644 --- a/docs/examples/plot_pipeline_xgboost.py +++ b/docs/examples/plot_pipeline_xgboost.py @@ -97,7 +97,7 @@ update_registered_converter( XGBClassifier, 'XGBoostXGBClassifier', calculate_linear_classifier_output_shapes, convert_xgboost, - options={'nocl': [True, False], 'zipmap': [True, False]}) + options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}) ################################## # Convert again diff --git a/docs/tutorial/plot_gexternal_lightgbm.py b/docs/tutorial/plot_gexternal_lightgbm.py index 6fc388df6..b999818e2 100644 --- a/docs/tutorial/plot_gexternal_lightgbm.py +++ b/docs/tutorial/plot_gexternal_lightgbm.py @@ -64,7 +64,7 @@ update_registered_converter( LGBMClassifier, 'LightGbmLGBMClassifier', calculate_linear_classifier_output_shapes, convert_lightgbm, - options={'nocl': [True, False], 'zipmap': [True, False]}) + options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}) ################################## # Convert again diff --git a/docs/tutorial/plot_gexternal_xgboost.py b/docs/tutorial/plot_gexternal_xgboost.py index 63e1a2c80..23a01592b 100644 --- a/docs/tutorial/plot_gexternal_xgboost.py +++ b/docs/tutorial/plot_gexternal_xgboost.py @@ -88,7 +88,7 @@ update_registered_converter( XGBClassifier, 'XGBoostXGBClassifier', calculate_linear_classifier_output_shapes, convert_xgboost, - options={'nocl': [True, False], 'zipmap': [True, False]}) + options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}) ################################## # Convert again diff --git a/skl2onnx/_parse.py b/skl2onnx/_parse.py index 9167198af..48a569ad3 100644 --- a/skl2onnx/_parse.py +++ b/skl2onnx/_parse.py @@ -338,12 +338,19 @@ def _parse_sklearn_classifier(scope, model, inputs, custom_parsers=None): if model.__class__ in [NuSVC, SVC] and not model.probability: return probability_tensor options = scope.get_options(model, dict(zipmap=True)) - if not options['zipmap']: + if isinstance(options['zipmap'], bool) and not options['zipmap']: return probability_tensor - this_operator = scope.declare_local_operator('SklearnZipMap') + + if options['zipmap'] == 'columns': + this_operator = scope.declare_local_operator('SklearnZipMapColumns') + classes = get_label_classes(scope, model) + classes_names = get_label_classes(scope, model, node_names=True) + else: + this_operator = scope.declare_local_operator('SklearnZipMap') + classes = get_label_classes(scope, model) + this_operator.inputs = probability_tensor label_type = Int64TensorType([None]) - classes = get_label_classes(scope, model) if (isinstance(model.classes_, list) and isinstance(model.classes_[0], np.ndarray)): @@ -367,13 +374,20 @@ def _parse_sklearn_classifier(scope, model, inputs, custom_parsers=None): label_type = StringTensorType([None]) output_label = scope.declare_local_variable('output_label', label_type) - output_probability = scope.declare_local_variable( - 'output_probability', - SequenceType( - DictionaryType( - label_type, guess_tensor_type(inputs[0].type)))) this_operator.outputs.append(output_label) - this_operator.outputs.append(output_probability) + + if options['zipmap'] == 'columns': + prob_type = probability_tensor[1].type + for cl in classes_names: + output_cl = scope.declare_local_variable(cl, prob_type.__class__()) + this_operator.outputs.append(output_cl) + else: + output_probability = scope.declare_local_variable( + 'output_probability', + SequenceType( + DictionaryType( + label_type, guess_tensor_type(inputs[0].type)))) + this_operator.outputs.append(output_probability) return this_operator.outputs diff --git a/skl2onnx/_supported_operators.py b/skl2onnx/_supported_operators.py index 100bb306b..581290800 100644 --- a/skl2onnx/_supported_operators.py +++ b/skl2onnx/_supported_operators.py @@ -413,7 +413,7 @@ def update_registered_converter(model, alias, shape_fct, convert_fct, update_registered_converter(SGDClassifier, 'SklearnLinearClassifier', calculate_linear_classifier_output_shapes, convert_sklearn_random_forest_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'raw_scores': [True, False]}) """ # noqa if (not overwrite and model in sklearn_operator_name_map diff --git a/skl2onnx/common/_topology.py b/skl2onnx/common/_topology.py index ec14766c2..3009758ce 100644 --- a/skl2onnx/common/_topology.py +++ b/skl2onnx/common/_topology.py @@ -357,7 +357,7 @@ def reserve_name(self, raw_name): raise RuntimeError( "Name '{}' already reserved.".format(raw_name)) self.reserved[raw_name] = self.get_unique_variable_name(raw_name) - return self.reserved[raw_name] + return raw_name def unreserve_name(self, name): """ diff --git a/skl2onnx/common/utils_classifier.py b/skl2onnx/common/utils_classifier.py index 54e557a68..de807fe49 100644 --- a/skl2onnx/common/utils_classifier.py +++ b/skl2onnx/common/utils_classifier.py @@ -9,10 +9,10 @@ from ..proto import onnx_proto -def get_label_classes(scope, op): +def get_label_classes(scope, op, node_names=False): """ Extracts the model classes, - handles option ``nocl``. + handles option ``nocl`` and ``zipmap=='columns'`` """ options = scope.get_options(op, dict(nocl=False)) if options['nocl']: @@ -21,6 +21,20 @@ def get_label_classes(scope, op): "Options 'nocl=True' is not implemented for multi-label " "classification (class: {}).".format(op.__class__.__name__)) classes = np.arange(0, len(op.classes_)) + elif node_names: + try: + options = scope.get_options(op, dict(zipmap=False)) + zipcol = options['zipmap'] == 'columns' + except NameError: + zipcol = False + if zipcol: + clnames = op.classes_.ravel() + if np.issubdtype(clnames.dtype, np.integer): + classes = np.array(['i%d' % c for c in clnames]) + else: + classes = np.array(['s%s' % c for c in clnames]) + else: + classes = op.classes_ else: classes = op.classes_ return classes diff --git a/skl2onnx/operator_converters/ada_boost.py b/skl2onnx/operator_converters/ada_boost.py index 58ee0902f..a34b266fb 100644 --- a/skl2onnx/operator_converters/ada_boost.py +++ b/skl2onnx/operator_converters/ada_boost.py @@ -566,7 +566,7 @@ def convert_sklearn_ada_boost_regressor(scope, operator, container): register_converter('SklearnAdaBoostClassifier', convert_sklearn_ada_boost_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False], 'raw_scores': [True, False]}) register_converter('SklearnAdaBoostRegressor', diff --git a/skl2onnx/operator_converters/bagging.py b/skl2onnx/operator_converters/bagging.py index c73306cae..fb1539ecf 100644 --- a/skl2onnx/operator_converters/bagging.py +++ b/skl2onnx/operator_converters/bagging.py @@ -195,7 +195,7 @@ def convert_sklearn_bagging_regressor(scope, operator, container): register_converter('SklearnBaggingClassifier', convert_sklearn_bagging_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False], 'raw_scores': [True, False]}) register_converter('SklearnBaggingRegressor', diff --git a/skl2onnx/operator_converters/calibrated_classifier_cv.py b/skl2onnx/operator_converters/calibrated_classifier_cv.py index ea0a654f9..492f37370 100644 --- a/skl2onnx/operator_converters/calibrated_classifier_cv.py +++ b/skl2onnx/operator_converters/calibrated_classifier_cv.py @@ -446,5 +446,5 @@ def convert_sklearn_calibrated_classifier_cv(scope, operator, container): register_converter('SklearnCalibratedClassifierCV', convert_sklearn_calibrated_classifier_cv, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False]}) diff --git a/skl2onnx/operator_converters/decision_tree.py b/skl2onnx/operator_converters/decision_tree.py index 00ef467ff..3ae11b56f 100644 --- a/skl2onnx/operator_converters/decision_tree.py +++ b/skl2onnx/operator_converters/decision_tree.py @@ -387,7 +387,7 @@ def _recursive_build_labels(index, current): register_converter('SklearnDecisionTreeClassifier', convert_sklearn_decision_tree_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False], 'decision_path': [True, False]}) register_converter('SklearnDecisionTreeRegressor', @@ -395,7 +395,7 @@ def _recursive_build_labels(index, current): options={'decision_path': [True, False]}) register_converter('SklearnExtraTreeClassifier', convert_sklearn_decision_tree_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False], 'decision_path': [True, False]}) register_converter('SklearnExtraTreeRegressor', diff --git a/skl2onnx/operator_converters/gradient_boosting.py b/skl2onnx/operator_converters/gradient_boosting.py index ebec04edc..35ed02760 100644 --- a/skl2onnx/operator_converters/gradient_boosting.py +++ b/skl2onnx/operator_converters/gradient_boosting.py @@ -164,7 +164,7 @@ def convert_sklearn_gradient_boosting_regressor( register_converter('SklearnGradientBoostingClassifier', convert_sklearn_gradient_boosting_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'raw_scores': [True, False], 'nocl': [True, False]}) register_converter('SklearnGradientBoostingRegressor', diff --git a/skl2onnx/operator_converters/linear_classifier.py b/skl2onnx/operator_converters/linear_classifier.py index 958a67754..f67ef4ad8 100644 --- a/skl2onnx/operator_converters/linear_classifier.py +++ b/skl2onnx/operator_converters/linear_classifier.py @@ -166,7 +166,7 @@ def convert_sklearn_linear_classifier(scope, operator, container): register_converter('SklearnLinearClassifier', convert_sklearn_linear_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False], 'raw_scores': [True, False]}) register_converter('SklearnLinearSVC', convert_sklearn_linear_classifier, diff --git a/skl2onnx/operator_converters/multilayer_perceptron.py b/skl2onnx/operator_converters/multilayer_perceptron.py index 8f0f8f76f..525a272ea 100644 --- a/skl2onnx/operator_converters/multilayer_perceptron.py +++ b/skl2onnx/operator_converters/multilayer_perceptron.py @@ -186,7 +186,7 @@ def convert_sklearn_mlp_regressor(scope, operator, container): register_converter('SklearnMLPClassifier', convert_sklearn_mlp_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False]}) register_converter('SklearnMLPRegressor', convert_sklearn_mlp_regressor) diff --git a/skl2onnx/operator_converters/naive_bayes.py b/skl2onnx/operator_converters/naive_bayes.py index 47470fe98..1d2a3a844 100644 --- a/skl2onnx/operator_converters/naive_bayes.py +++ b/skl2onnx/operator_converters/naive_bayes.py @@ -474,17 +474,17 @@ def convert_sklearn_naive_bayes(scope, operator, container): register_converter('SklearnBernoulliNB', convert_sklearn_naive_bayes, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False]}) register_converter('SklearnCategoricalNB', convert_sklearn_naive_bayes, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False]}) register_converter('SklearnComplementNB', convert_sklearn_naive_bayes, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False]}) register_converter('SklearnGaussianNB', convert_sklearn_naive_bayes, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False]}) register_converter('SklearnMultinomialNB', convert_sklearn_naive_bayes, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False]}) diff --git a/skl2onnx/operator_converters/nearest_neighbours.py b/skl2onnx/operator_converters/nearest_neighbours.py index 0a6c387a1..5416f9dea 100644 --- a/skl2onnx/operator_converters/nearest_neighbours.py +++ b/skl2onnx/operator_converters/nearest_neighbours.py @@ -732,13 +732,13 @@ def convert_nca(scope, operator, container): register_converter( 'SklearnKNeighborsClassifier', convert_nearest_neighbors_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False], 'raw_scores': [True, False], 'optim': [None, 'cdist']}) register_converter( 'SklearnRadiusNeighborsClassifier', convert_nearest_neighbors_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False], 'raw_scores': [True, False], 'optim': [None, 'cdist']}) diff --git a/skl2onnx/operator_converters/one_vs_rest_classifier.py b/skl2onnx/operator_converters/one_vs_rest_classifier.py index f51a6d04d..adb93797e 100644 --- a/skl2onnx/operator_converters/one_vs_rest_classifier.py +++ b/skl2onnx/operator_converters/one_vs_rest_classifier.py @@ -132,6 +132,6 @@ def convert_one_vs_rest_classifier(scope, operator, container): register_converter('SklearnOneVsRestClassifier', convert_one_vs_rest_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False], 'raw_scores': [True, False]}) diff --git a/skl2onnx/operator_converters/random_forest.py b/skl2onnx/operator_converters/random_forest.py index 6bd2bddac..516fca7dc 100644 --- a/skl2onnx/operator_converters/random_forest.py +++ b/skl2onnx/operator_converters/random_forest.py @@ -445,7 +445,7 @@ def convert_sklearn_random_forest_regressor_converter( register_converter('SklearnRandomForestClassifier', convert_sklearn_random_forest_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'raw_scores': [True, False], 'nocl': [True, False], 'decision_path': [True, False]}) @@ -454,7 +454,7 @@ def convert_sklearn_random_forest_regressor_converter( options={'decision_path': [True, False]}) register_converter('SklearnExtraTreesClassifier', convert_sklearn_random_forest_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'raw_scores': [True, False], 'nocl': [True, False], 'decision_path': [True, False]}) @@ -463,11 +463,11 @@ def convert_sklearn_random_forest_regressor_converter( options={'decision_path': [True, False]}) register_converter('SklearnHistGradientBoostingClassifier', convert_sklearn_random_forest_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'raw_scores': [True, False], 'nocl': [True, False]}) register_converter('SklearnHistGradientBoostingRegressor', convert_sklearn_random_forest_regressor_converter, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'raw_scores': [True, False], 'nocl': [True, False]}) diff --git a/skl2onnx/operator_converters/sgd_classifier.py b/skl2onnx/operator_converters/sgd_classifier.py index 75144b52f..41b839464 100644 --- a/skl2onnx/operator_converters/sgd_classifier.py +++ b/skl2onnx/operator_converters/sgd_classifier.py @@ -243,6 +243,6 @@ def convert_sklearn_sgd_classifier(scope, operator, container): register_converter('SklearnSGDClassifier', convert_sklearn_sgd_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False], 'raw_scores': [True, False]}) diff --git a/skl2onnx/operator_converters/stacking.py b/skl2onnx/operator_converters/stacking.py index d5c5176c6..dfc34eaab 100644 --- a/skl2onnx/operator_converters/stacking.py +++ b/skl2onnx/operator_converters/stacking.py @@ -156,7 +156,7 @@ def convert_sklearn_stacking_regressor(scope, operator, container): register_converter('SklearnStackingClassifier', convert_sklearn_stacking_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False], 'raw_scores': [True, False]}) register_converter('SklearnStackingRegressor', diff --git a/skl2onnx/operator_converters/support_vector_machines.py b/skl2onnx/operator_converters/support_vector_machines.py index 3c1b797fa..f9cb11e8d 100644 --- a/skl2onnx/operator_converters/support_vector_machines.py +++ b/skl2onnx/operator_converters/support_vector_machines.py @@ -354,6 +354,6 @@ def convert_sklearn_svm_classifier( register_converter('SklearnOneClassSVM', convert_sklearn_svm_regressor) register_converter('SklearnSVC', convert_sklearn_svm_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False]}) register_converter('SklearnSVR', convert_sklearn_svm_regressor) diff --git a/skl2onnx/operator_converters/voting_classifier.py b/skl2onnx/operator_converters/voting_classifier.py index 0a0935cdf..741bc64f2 100644 --- a/skl2onnx/operator_converters/voting_classifier.py +++ b/skl2onnx/operator_converters/voting_classifier.py @@ -127,5 +127,5 @@ def convert_voting_classifier(scope, operator, container): register_converter('SklearnVotingClassifier', convert_voting_classifier, - options={'zipmap': [True, False], + options={'zipmap': [True, False, 'columns'], 'nocl': [True, False]}) diff --git a/skl2onnx/operator_converters/zip_map.py b/skl2onnx/operator_converters/zip_map.py index 0f7c36b3b..979829569 100644 --- a/skl2onnx/operator_converters/zip_map.py +++ b/skl2onnx/operator_converters/zip_map.py @@ -3,13 +3,13 @@ # Licensed under the MIT License. See License.txt in the project root for # license information. # -------------------------------------------------------------------------- - from ..proto import onnx_proto -from ..common._apply_operation import apply_cast, apply_identity +from ..common._apply_operation import ( + apply_slice, apply_cast, apply_identity, apply_reshape) from ..common._registration import register_converter -def convert_sklearn_zipmap(scope, operator, container): +def _common_convert_sklearn_zipmap(scope, operator, container): zipmap_attrs = {'name': scope.get_unique_operator_name('ZipMap')} to_type = onnx_proto.TensorProto.INT64 @@ -25,9 +25,29 @@ def convert_sklearn_zipmap(scope, operator, container): else: apply_cast(scope, operator.inputs[0].full_name, operator.outputs[0].full_name, container, to=to_type) + return zipmap_attrs + + +def convert_sklearn_zipmap(scope, operator, container): + zipmap_attrs = _common_convert_sklearn_zipmap(scope, operator, container) container.add_node('ZipMap', operator.inputs[1].full_name, operator.outputs[1].full_name, op_domain='ai.onnx.ml', **zipmap_attrs) +def convert_sklearn_zipmap_columns(scope, operator, container): + _common_convert_sklearn_zipmap(scope, operator, container) + probs = operator.inputs[1].full_name + for i in range(1, len(operator.outputs)): + out = operator.outputs[i].full_name + flat = scope.get_unique_variable_name(out) + apply_slice( + scope, probs, flat, container, starts=[i-1], ends=[i], axes=[1], + operator_name=scope.get_unique_operator_name('Slice')) + apply_reshape( + scope, flat, out, container, desired_shape=(-1, ), + operator_name=scope.get_unique_operator_name('reshape')) + + register_converter('SklearnZipMap', convert_sklearn_zipmap) +register_converter('SklearnZipMapColumns', convert_sklearn_zipmap_columns) diff --git a/skl2onnx/shape_calculators/zip_map.py b/skl2onnx/shape_calculators/zip_map.py index eba15d2e3..7babb0a04 100644 --- a/skl2onnx/shape_calculators/zip_map.py +++ b/skl2onnx/shape_calculators/zip_map.py @@ -12,4 +12,12 @@ def calculate_sklearn_zipmap(operator): check_input_and_output_numbers(operator, output_count_range=2) +def calculate_sklearn_zipmap_columns(operator): + N = operator.inputs[0].type.shape[0] + for i in range(1, len(operator.outputs)): + operator.outputs[i].type.shape = [N] + + register_shape_calculator('SklearnZipMap', calculate_sklearn_zipmap) +register_shape_calculator( + 'SklearnZipMapColumns', calculate_sklearn_zipmap_columns) diff --git a/tests/test_sklearn_glm_classifier_converter.py b/tests/test_sklearn_glm_classifier_converter.py index 13a41a786..c313f7c0d 100644 --- a/tests/test_sklearn_glm_classifier_converter.py +++ b/tests/test_sklearn_glm_classifier_converter.py @@ -1,6 +1,8 @@ from distutils.version import StrictVersion import unittest import numpy as np +from numpy.testing import assert_almost_equal +import onnx import sklearn from sklearn import linear_model from sklearn.svm import LinearSVC @@ -17,6 +19,7 @@ dump_data_and_model, fit_classification_model, fit_multilabel_classification_model, + TARGET_OPSET ) @@ -814,6 +817,54 @@ def test_model_ridge_classifier_cv_multilabel(self): "onnxruntime.__version__)<= StrictVersion('0.2.1')", ) + @unittest.skipIf(not onnx_built_with_ml(), + reason="Requires ONNX-ML extension.") + @unittest.skipIf(StrictVersion(onnx.__version__) < StrictVersion('1.6'), + reason="Requires onnx 1.6") + def test_model_classifier_multi_zipmap_columns(self): + model, X = fit_classification_model( + linear_model.LogisticRegression(), 3, + n_features=4, label_string=True) + model_onnx = convert_sklearn( + model, + "multi-class ridge classifier", + [("input", FloatTensorType([None, X.shape[1]]))], + options={linear_model.LogisticRegression: {'zipmap': 'columns'}}, + target_opset=TARGET_OPSET) + self.assertIsNotNone(model_onnx) + sess = InferenceSession(model_onnx.SerializeToString()) + names = [_.name for _ in sess.get_outputs()] + self.assertEqual(['output_label', 'scl0', 'scl1', 'scl2'], names) + xt = X[:10].astype(np.float32) + got = sess.run(None, {'input': xt}) + prob = model.predict_proba(xt) + for i in range(prob.shape[1]): + assert_almost_equal(prob[:, i], got[i+1]) + + @unittest.skipIf(not onnx_built_with_ml(), + reason="Requires ONNX-ML extension.") + @unittest.skipIf(StrictVersion(onnx.__version__) < StrictVersion('1.6'), + reason="Requires onnx 1.6") + def test_model_classifier_multi_class_string_zipmap_columns(self): + model, X = fit_classification_model( + linear_model.LogisticRegression(), 3, + n_features=4, label_string=False) + model_onnx = convert_sklearn( + model, + "multi-class ridge classifier", + [("input", FloatTensorType([None, X.shape[1]]))], + options={linear_model.LogisticRegression: {'zipmap': 'columns'}}, + target_opset=TARGET_OPSET) + self.assertIsNotNone(model_onnx) + sess = InferenceSession(model_onnx.SerializeToString()) + names = [_.name for _ in sess.get_outputs()] + self.assertEqual(['output_label', 'i0', 'i1', 'i2'], names) + xt = X[:10].astype(np.float32) + got = sess.run(None, {'input': xt}) + prob = model.predict_proba(xt) + for i in range(prob.shape[1]): + assert_almost_equal(prob[:, i], got[i+1]) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_utils/tests_helper.py b/tests/test_utils/tests_helper.py index 19ee57d2f..b9530ef62 100644 --- a/tests/test_utils/tests_helper.py +++ b/tests/test_utils/tests_helper.py @@ -54,10 +54,10 @@ def fit_classification_model(model, n_classes, is_int=False, pos_features=False, label_string=False, random_state=42, is_bool=False, n_features=20): - X, y = make_classification(n_classes=n_classes, n_features=n_features, - n_samples=500, - random_state=random_state, - n_informative=7) + X, y = make_classification( + n_classes=n_classes, n_features=n_features, n_samples=500, + random_state=random_state, n_informative=min(7, n_features), + n_redundant=min(2, n_features - min(7, n_features))) if label_string: y = numpy.array(['cl%d' % cl for cl in y]) X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32) diff --git a/tests_onnxmltools/test_lightgbm.py b/tests_onnxmltools/test_lightgbm.py index 40f23611a..3436f5281 100644 --- a/tests_onnxmltools/test_lightgbm.py +++ b/tests_onnxmltools/test_lightgbm.py @@ -36,7 +36,7 @@ def setUpClass(self): LGBMClassifier, 'LightGbmLGBMClassifier', calculate_linear_classifier_output_shapes, convert_lightgbm, options={ - 'zipmap': [True, False], 'nocl': [True, False]}) + 'zipmap': [True, False, 'columns'], 'nocl': [True, False]}) update_registered_converter( LGBMRegressor, 'LgbmRegressor', diff --git a/tests_onnxmltools/test_xgboost_converters.py b/tests_onnxmltools/test_xgboost_converters.py index ac4f829e3..ed1bce491 100644 --- a/tests_onnxmltools/test_xgboost_converters.py +++ b/tests_onnxmltools/test_xgboost_converters.py @@ -51,12 +51,14 @@ def custom_parser(scope, model, inputs, custom_parsers=None): XGBClassifier, 'XGBClassifier', calculate_linear_classifier_output_shapes, convert_xgboost, parser=custom_parser, - options={'zipmap': [True, False], 'nocl': [True, False]}) + options={'zipmap': [True, False, 'columns'], + 'nocl': [True, False]}) update_registered_converter( XGBRegressor, 'XGBRegressor', calculate_linear_regressor_output_shapes, convert_xgboost, - options={'zipmap': [True, False], 'nocl': [True, False]}) + options={'zipmap': [True, False, 'columns'], + 'nocl': [True, False]}) def test_xgb_regressor(self): iris = load_iris() From b333eebaba14649ff71044f3d52d90da2e04bbdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Tue, 24 Nov 2020 19:50:12 +0100 Subject: [PATCH 12/17] Fix converter for CalibratedClassifier 0.24 (#556) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré Co-authored-by: xavier dupré Signed-off-by: xavier dupré --- skl2onnx/operator_converters/calibrated_classifier_cv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skl2onnx/operator_converters/calibrated_classifier_cv.py b/skl2onnx/operator_converters/calibrated_classifier_cv.py index 492f37370..86690f7b1 100644 --- a/skl2onnx/operator_converters/calibrated_classifier_cv.py +++ b/skl2onnx/operator_converters/calibrated_classifier_cv.py @@ -255,7 +255,8 @@ def convert_calibrated_classifier_base_estimator(scope, operator, container, base_model = model.base_estimator op_type = sklearn_operator_name_map[type(base_model)] - n_classes = len(model.classes_) + n_classes = (len(model.classes_) if hasattr(model, 'classes_') else + len(base_model.classes_)) prob_name = [None] * n_classes this_operator = scope.declare_local_operator(op_type) From bed1f849a06e574fac33575f19e8162f81582c3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Wed, 25 Nov 2020 13:57:56 +0100 Subject: [PATCH 13/17] Add onnxruntime 1.5.1 to CI (#542) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add onnxruntime 1.5.1 to CI Signed-off-by: xavier dupré --- .azure-pipelines/linux-conda-CI.yml | 12 +++++++++++- .azure-pipelines/win32-conda-CI.yml | 10 ++++++++++ tests/test_sklearn_nearest_neighbour_converter.py | 3 ++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml index 7fdca64c5..1ba430d0f 100644 --- a/.azure-pipelines/linux-conda-CI.yml +++ b/.azure-pipelines/linux-conda-CI.yml @@ -13,6 +13,16 @@ jobs: vmImage: 'Ubuntu-16.04' strategy: matrix: + Py37-Onnx170-Rt151-Skl0232: + do.bench: '0' + python.version: '3.7' + numpy.version: '>=1.18.1' + onnx.version: '==1.7.0' + onnx.target_opset: '' + onnxrt.version: 'onnxruntime==1.5.1' # '-i https://test.pypi.org/simple/ ort-nightly' + sklearn.version: '==0.23.2' + onnxcc.version: 'git' + run.example: '1' Py37-Onnx170-Rt140-Skl0232: do.bench: '0' python.version: '3.7' @@ -22,7 +32,7 @@ jobs: onnxrt.version: 'onnxruntime==1.4.0' # '-i https://test.pypi.org/simple/ ort-nightly' sklearn.version: '==0.23.2' onnxcc.version: 'git' - run.example: '1' + run.example: '0' Py37-Onnx170-Rt130-Skl0231: do.bench: '0' python.version: '3.7' diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml index 1078d86a0..53cac4be3 100644 --- a/.azure-pipelines/win32-conda-CI.yml +++ b/.azure-pipelines/win32-conda-CI.yml @@ -13,6 +13,16 @@ jobs: vmImage: 'vs2017-win2016' strategy: matrix: + Py37-Onnx170-Rt151-Skl0232: + python.version: '3.7' + # onnx.version: '-i https://test.pypi.org/simple/ onnx' + onnx.version: 'onnx==1.7.0' + onnx.target_opset: '' + numpy.version: 'numpy>=1.18.1' + scipy.version: 'scipy' + onnxrt.version: 'onnxruntime==1.5.1' # -i https://test.pypi.org/simple/ ort-nightly' + onnxcc.version: 'git+https://github.com/microsoft/onnxconverter-common.git' + sklearn.version: '==0.23.2' Py37-Onnx170-Rt140-Skl0232: python.version: '3.7' # onnx.version: '-i https://test.pypi.org/simple/ onnx' diff --git a/tests/test_sklearn_nearest_neighbour_converter.py b/tests/test_sklearn_nearest_neighbour_converter.py index 8eb2983fa..a564e1f7b 100644 --- a/tests/test_sklearn_nearest_neighbour_converter.py +++ b/tests/test_sklearn_nearest_neighbour_converter.py @@ -283,7 +283,8 @@ def test_model_knn_regressor2_1_radius(self): None, {'input': X.astype(numpy.float32)}) rows.append('--{}--'.format(out)) rows.append(str(res)) - if onnxruntime.__version__.startswith('1.4.'): + if (onnxruntime.__version__.startswith('1.4.') or + onnxruntime.__version__.startswith('1.5.')): # TODO: investigate the regression in onnxruntime 1.4 # One broadcasted multiplication unexpectedly produces nan. whole = '\n'.join(rows) From c38cbb107c31f52c58d361c46c4b611f56f55fbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Wed, 25 Nov 2020 15:45:33 +0100 Subject: [PATCH 14/17] Add missing import in one documentation page (#559) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add import in one documentation page Signed-off-by: xavier dupré --- docs/pipeline.rst | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/docs/pipeline.rst b/docs/pipeline.rst index 78b7ee917..f9f7962ac 100644 --- a/docs/pipeline.rst +++ b/docs/pipeline.rst @@ -23,6 +23,13 @@ useful to build complex pipelines such as the following one: :: + from sklearn.linear_model import LogisticRegression + from sklearn.pipeline import Pipeline + from sklearn.preprocessing import StandardScaler, OneHotEncoder + from sklearn.impute import SimpleImputer + from sklearn.decomposition import TruncatedSVD + from sklearn.compose import ColumnTransformer + numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] @@ -73,10 +80,18 @@ Once fitted, the model is converted into *ONNX*: :: + from skl2onnx import convert_sklearn + from skl2onnx.common.data_types import FloatTensorType, StringTensorType + initial_type = [('numfeat', FloatTensorType([None, 3])), ('strfeat', StringTensorType([None, 2]))] model_onnx = convert_sklearn(model, initial_types=initial_type) +.. note:: + The error ``AttributeError: 'ColumnTransformer' object has no attribute 'transformers_'`` + means the model was not trained. The converter tries to access an attribute + created by method `fit`. + It can be represented as a `DOT `_ graph: @@ -225,6 +240,7 @@ a pipeline and each of its components independently. from sklearn.preprocessing import StandardScaler import onnxruntime from skl2onnx.helpers import collect_intermediate_steps, compare_objects + from skl2onnx.common.data_types import FloatTensorType # Let's fit a model. data = numpy.array([[0, 0], [0, 0], [2, 1], [2, 1]], @@ -282,7 +298,11 @@ them. from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler import onnxruntime - from skl2onnx.helpers import collect_intermediate_steps, compare_objects + from skl2onnx.common.data_types import guess_data_type + from skl2onnx.common.exceptions import MissingShapeCalculator + from skl2onnx.helpers import collect_intermediate_steps, compare_objects, enumerate_pipeline_models + from skl2onnx.helpers.investigate import _alter_model_for_debugging + from skl2onnx import convert_sklearn class MyScaler(StandardScaler): pass From 44c89993a8785b4067f3e946080e9bc7067149ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Thu, 26 Nov 2020 12:25:41 +0100 Subject: [PATCH 15/17] Support double for linear models, VotingRegressor (#561) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Support double for linear models, VotingRegressor * disable unit test failing with onnxruntime 1.5 Signed-off-by: xavier dupré --- .../operator_converters/linear_regressor.py | 24 ++- .../operator_converters/voting_regressor.py | 21 ++- tests/test_sklearn_concat.py | 4 +- tests/test_sklearn_double_tensor_type_reg.py | 144 ++++++++++++++++++ tests/test_sklearn_gaussian_process.py | 8 +- tests/test_sklearn_pipeline.py | 21 ++- ...test_sklearn_voting_regressor_converter.py | 9 +- 7 files changed, 207 insertions(+), 24 deletions(-) create mode 100644 tests/test_sklearn_double_tensor_type_reg.py diff --git a/skl2onnx/operator_converters/linear_regressor.py b/skl2onnx/operator_converters/linear_regressor.py index d200ccfb3..c81cb3cd4 100644 --- a/skl2onnx/operator_converters/linear_regressor.py +++ b/skl2onnx/operator_converters/linear_regressor.py @@ -8,15 +8,35 @@ except ImportError: import collections as cabc import numpy as np -from ..common._apply_operation import apply_cast +from ..common._apply_operation import apply_cast, apply_add from ..common.data_types import ( - BooleanTensorType, Int64TensorType, guess_numpy_type) + BooleanTensorType, Int64TensorType, DoubleTensorType, + guess_numpy_type, guess_proto_type) from ..common._registration import register_converter from ..proto import onnx_proto def convert_sklearn_linear_regressor(scope, operator, container): op = operator.raw_operator + + if type(operator.inputs[0].type) in (DoubleTensorType, ): + proto_dtype = guess_proto_type(operator.inputs[0].type) + coef = scope.get_unique_variable_name('coef') + model_coef = op.coef_.T + container.add_initializer( + coef, proto_dtype, model_coef.shape, model_coef.ravel().tolist()) + intercept = scope.get_unique_variable_name('intercept') + container.add_initializer( + intercept, proto_dtype, op.intercept_.shape, + op.intercept_.ravel().tolist()) + multiplied = scope.get_unique_variable_name('multiplied') + container.add_node( + 'MatMul', [operator.inputs[0].full_name, coef], multiplied, + name=scope.get_unique_operator_name('MatMul')) + apply_add(scope, [multiplied, intercept], + operator.outputs[0].full_name, container) + return + op_type = 'LinearRegressor' dtype = guess_numpy_type(operator.inputs[0].type) if dtype not in (np.float32, np.float64): diff --git a/skl2onnx/operator_converters/voting_regressor.py b/skl2onnx/operator_converters/voting_regressor.py index 36d24ab67..8024d91ed 100644 --- a/skl2onnx/operator_converters/voting_regressor.py +++ b/skl2onnx/operator_converters/voting_regressor.py @@ -6,8 +6,9 @@ from ..common._registration import register_converter from ..common._apply_operation import apply_mul +from ..common.data_types import ( + guess_proto_type, FloatTensorType, DoubleTensorType) from .._supported_operators import sklearn_operator_name_map -from ..proto import onnx_proto def convert_voting_regressor(scope, operator, container): @@ -16,6 +17,17 @@ def convert_voting_regressor(scope, operator, container): """ op = operator.raw_operator + if not isinstance(operator.inputs[0].type, + (FloatTensorType, DoubleTensorType)): + this_operator = scope.declare_local_operator('SklearnCast') + this_operator.raw_operator = None + this_operator.inputs = operator.inputs + var_name = scope.declare_local_variable('cast', FloatTensorType()) + this_operator.outputs.append(var_name) + inputs = this_operator.outputs + else: + inputs = operator.inputs + vars_names = [] for i, estimator in enumerate(op.estimators_): if estimator is None: @@ -25,10 +37,10 @@ def convert_voting_regressor(scope, operator, container): this_operator = scope.declare_local_operator(op_type) this_operator.raw_operator = estimator - this_operator.inputs = operator.inputs + this_operator.inputs = inputs var_name = scope.declare_local_variable( - 'var_%d' % i, operator.inputs[0].type.__class__()) + 'var_%d' % i, inputs[0].type.__class__()) this_operator.outputs.append(var_name) var_name = var_name.onnx_name @@ -38,8 +50,9 @@ def convert_voting_regressor(scope, operator, container): val = 1. / len(op.estimators_) weights_name = scope.get_unique_variable_name('w%d' % i) + proto_dtype = guess_proto_type(inputs[0].type) container.add_initializer( - weights_name, onnx_proto.TensorProto.FLOAT, [1], [val]) + weights_name, proto_dtype, [1], [val]) wvar_name = scope.get_unique_variable_name('wvar_%d' % i) apply_mul(scope, [var_name, weights_name], wvar_name, container, broadcast=1) diff --git a/tests/test_sklearn_concat.py b/tests/test_sklearn_concat.py index 6a67f87f4..2ec76ad3e 100644 --- a/tests/test_sklearn_concat.py +++ b/tests/test_sklearn_concat.py @@ -125,7 +125,9 @@ def test_concat(self): diff = np.sort( np.abs(np.squeeze(pred_skl) - np.squeeze(pred_onx))) - self.assertEqual(diff[0], diff[-1]) + if diff[0] != diff[-1]: + raise AssertionError( + "Discrepencies\nSKL\n{}\nORT\n{}".format(pred_skl, pred_onx)) if __name__ == "__main__": diff --git a/tests/test_sklearn_double_tensor_type_reg.py b/tests/test_sklearn_double_tensor_type_reg.py new file mode 100644 index 000000000..2195bfff2 --- /dev/null +++ b/tests/test_sklearn_double_tensor_type_reg.py @@ -0,0 +1,144 @@ +"""Tests GLMRegressor converter.""" + +import unittest +from distutils.version import StrictVersion +import numpy as np +from sklearn.ensemble import BaggingRegressor +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.linear_model import LinearRegression, SGDRegressor +from sklearn.neighbors import KNeighborsRegressor +from sklearn.neural_network import MLPRegressor +try: + from sklearn.ensemble import VotingRegressor +except ImportError: + # New in 0.21 + VotingRegressor = None +from skl2onnx import convert_sklearn, to_onnx +from skl2onnx.common.data_types import DoubleTensorType +from onnxruntime import __version__ as ort_version +from test_utils import ( + dump_data_and_model, fit_regression_model) # , TARGET_OPSET) + +TARGET_OPSET = 12 # change when PR 551 + + +class TestSklearnDoubleTensorTypeRegressor(unittest.TestCase): + @unittest.skipIf( + StrictVersion(ort_version) <= StrictVersion("1.2.0"), + reason="onnxruntime misses implementation for double") + def test_model_linear_regression_64(self): + model, X = fit_regression_model(LinearRegression()) + model_onnx = convert_sklearn( + model, "linear regression", + [("input", DoubleTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET) + self.assertIn("elem_type: 11", str(model_onnx)) + dump_data_and_model( + X.astype(np.float64), model, model_onnx, + basename="SklearnLinearRegressionDouble") + + @unittest.skipIf( + StrictVersion(ort_version) < StrictVersion("1.6.0"), + reason="onnxruntime misses implementation for " + "Relu, Tanh, Sigmoid for double") + def test_model_mlpregressor_64(self): + # Could not find an implementation for the node Relu:Relu(6) + # Could not find an implementation for the node Tanh:Tanh(6) + # Could not find an implementation for the node Sigmoid:Sigmoid(6) + for activation in ['relu', 'tanh', 'logistic']: + with self.subTest(activation=activation): + model, X = fit_regression_model( + MLPRegressor(activation=activation)) + model_onnx = convert_sklearn( + model, "linear regression", + [("input", DoubleTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET) + self.assertIn("elem_type: 11", str(model_onnx)) + dump_data_and_model( + X.astype(np.float64), model, model_onnx, + basename="SklearnMLPRegressorDouble%s" % activation) + + @unittest.skipIf( + StrictVersion(ort_version) < StrictVersion("1.6.0"), + reason="onnxruntime misses implementation for " + "ReduceMean for double") + def test_bagging_regressor_sgd_64(self): + # Could not find an implementation for + # the node ReduceMean:ReduceMean(11) + model, X = fit_regression_model( + BaggingRegressor(SGDRegressor())) + model_onnx = convert_sklearn( + model, "bagging regressor", + [("input", DoubleTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET) + dump_data_and_model( + X.astype(np.float64), model, model_onnx, + basename="SklearnBaggingRegressorSGDDouble") + + @unittest.skipIf( + StrictVersion(ort_version) <= StrictVersion("1.2.0"), + reason="onnxruntime misses implementation for double") + def test_model_sgd_regressor_64(self): + model, X = fit_regression_model(SGDRegressor()) + model_onnx = convert_sklearn( + model, "linear regression", + [("input", DoubleTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET) + self.assertIn("elem_type: 11", str(model_onnx)) + dump_data_and_model( + X.astype(np.float64), model, model_onnx, + basename="SklearnLinearSGDRegressorDouble") + + @unittest.skipIf( + StrictVersion(ort_version) < StrictVersion("1.6.0"), + reason="shape_inference fails") + def test_gpr_rbf_fitted_true_double(self): + gp = GaussianProcessRegressor( + alpha=1e-7, n_restarts_optimizer=15, normalize_y=True) + gp, X = fit_regression_model(gp) + model_onnx = to_onnx( + gp, initial_types=[('X', DoubleTensorType([None, None]))], + target_opset=TARGET_OPSET) + dump_data_and_model( + X.astype(np.float64), gp, model_onnx, verbose=False, + basename="SklearnGaussianProcessRBFTDouble") + + @unittest.skipIf( + StrictVersion(ort_version) < StrictVersion("1.6.0"), + reason="onnxruntime misses implementation for " + "TopK for double") + def test_model_knn_regressor_double(self): + # Could not find an implementation for the node To_TopK:TopK(11) + model, X = fit_regression_model(KNeighborsRegressor(n_neighbors=2)) + model_onnx = convert_sklearn( + model, "KNN regressor", [("input", DoubleTensorType([None, 4]))], + target_opset=TARGET_OPSET, + options={id(model): {'optim': 'cdist'}}) + dump_data_and_model( + X.astype(np.float64)[:7], + model, model_onnx, + basename="SklearnKNeighborsRegressorDouble") + + @unittest.skipIf(VotingRegressor is None, reason="new in 0.21") + @unittest.skipIf( + StrictVersion(ort_version) < StrictVersion("1.6.0"), + reason="onnxruntime misses implementation for " + "Sum for double") + def test_model_voting_regression(self): + # Could not find an implementation for the node Sum:Sum(8) + model = VotingRegressor([ + ('lr', LinearRegression()), + ('dt', SGDRegressor())]) + model, X = fit_regression_model(model) + model_onnx = convert_sklearn( + model, "voting regression", + [("input", DoubleTensorType([None, X.shape[1]]))], + target_opset=TARGET_OPSET) + dump_data_and_model( + X.astype(np.float64), model, model_onnx, + basename="SklearnVotingRegressorDouble", + comparable_outputs=[0]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_sklearn_gaussian_process.py b/tests/test_sklearn_gaussian_process.py index bafa3bea8..8c09f3fd9 100644 --- a/tests/test_sklearn_gaussian_process.py +++ b/tests/test_sklearn_gaussian_process.py @@ -482,7 +482,9 @@ def test_gpr_rbf_unfitted(self): predict_attributes=options[ GaussianProcessRegressor]) - @unittest.skipIf(True, reason="shape_inference fails") + @unittest.skipIf( + StrictVersion(ort_version) < StrictVersion("1.6.0"), + reason="shape_inference fails") @unittest.skipIf( StrictVersion(ort_version) <= StrictVersion(THRESHOLD), reason="onnxruntime %s" % THRESHOLD) @@ -498,9 +500,9 @@ def test_gpr_rbf_fitted_true(self): gp, initial_types=[('X', DoubleTensorType([None, None]))], target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) - dump_data_and_model(X, gp, model_onnx, + dump_data_and_model(X.astype(np.float64), gp, model_onnx, verbose=False, - basename="SklearnGaussianProcessRBFT") + basename="SklearnGaussianProcessRBFTDouble") @unittest.skipIf( StrictVersion(ort_version) <= StrictVersion(THRESHOLD), diff --git a/tests/test_sklearn_pipeline.py b/tests/test_sklearn_pipeline.py index 652493cc3..3d50cafe1 100644 --- a/tests/test_sklearn_pipeline.py +++ b/tests/test_sklearn_pipeline.py @@ -1,6 +1,8 @@ import unittest +import urllib.error as url_error from distutils.version import StrictVersion from io import StringIO +import warnings import numpy from numpy.testing import assert_almost_equal import pandas @@ -268,21 +270,24 @@ def test_pipeline_column_transformer(self): @unittest.skipIf( ColumnTransformer is None, - reason="ColumnTransformer not available in 0.19", - ) + reason="ColumnTransformer not available in 0.19") @unittest.skipIf(not onnx_built_with_ml(), reason="Requires ONNX-ML extension.") @unittest.skipIf( not check_scikit_version(), - reason="Scikit 0.20 causes some mismatches", - ) + reason="Scikit 0.20 causes some mismatches") def test_pipeline_column_transformer_titanic(self): # fit - titanic_url = ( - "https://raw.githubusercontent.com/amueller/" - "scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv") - data = pandas.read_csv(titanic_url) + try: + titanic_url = ( + "https://raw.githubusercontent.com/amueller/" + "scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv") + data = pandas.read_csv(titanic_url) + except url_error.URLError: + # Do not fail the test if the data cannot be fetched. + warnings.warn("Unable to fetch titanic data.") + return X = data.drop("survived", axis=1) y = data["survived"] diff --git a/tests/test_sklearn_voting_regressor_converter.py b/tests/test_sklearn_voting_regressor_converter.py index 1ba46cb95..268cda5c2 100644 --- a/tests/test_sklearn_voting_regressor_converter.py +++ b/tests/test_sklearn_voting_regressor_converter.py @@ -28,8 +28,7 @@ def model_to_test(): class TestVotingRegressorConverter(unittest.TestCase): - @unittest.skipIf(VotingRegressor is None, - reason="new in 0.21") + @unittest.skipIf(VotingRegressor is None, reason="new in 0.21") def test_model_voting_regression(self): model, X = fit_regression_model(model_to_test()) model_onnx = convert_sklearn( @@ -48,8 +47,7 @@ def test_model_voting_regression(self): comparable_outputs=[0] ) - @unittest.skipIf(VotingRegressor is None, - reason="new in 0.21") + @unittest.skipIf(VotingRegressor is None, reason="new in 0.21") def test_model_voting_regression_int(self): model, X = fit_regression_model(model_to_test(), is_int=True) model_onnx = convert_sklearn( @@ -68,8 +66,7 @@ def test_model_voting_regression_int(self): comparable_outputs=[0] ) - @unittest.skipIf(VotingRegressor is None, - reason="new in 0.21") + @unittest.skipIf(VotingRegressor is None, reason="new in 0.21") def test_model_voting_regression_bool(self): model, X = fit_regression_model(model_to_test(), is_bool=True) model_onnx = convert_sklearn( From ee0b538f9bdd883406ca2b9a8db91fbc478f43fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Mon, 7 Dec 2020 18:37:27 +0100 Subject: [PATCH 16/17] Fix test with older opset, fix the nightly build. (#567) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix test with older opset * add nose * rename a function Signed-off-by: xavier dupré --- requirements-dev.txt | 1 + tests/test_sklearn_double_tensor_type_reg.py | 16 ++++- ...est_sklearn_nearest_neighbour_converter.py | 65 +++++-------------- tests/test_utils/__init__.py | 9 +-- 4 files changed, 35 insertions(+), 56 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 56dc9a31a..815fbee82 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,4 @@ +nose numpy protobuf codecov diff --git a/tests/test_sklearn_double_tensor_type_reg.py b/tests/test_sklearn_double_tensor_type_reg.py index 2195bfff2..3d2639ef3 100644 --- a/tests/test_sklearn_double_tensor_type_reg.py +++ b/tests/test_sklearn_double_tensor_type_reg.py @@ -3,6 +3,11 @@ import unittest from distutils.version import StrictVersion import numpy as np +from sklearn.exceptions import ConvergenceWarning +try: + from sklearn.utils._testing import ignore_warnings +except ImportError: + from sklearn.utils.testing import ignore_warnings from sklearn.ensemble import BaggingRegressor from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.linear_model import LinearRegression, SGDRegressor @@ -17,15 +22,16 @@ from skl2onnx.common.data_types import DoubleTensorType from onnxruntime import __version__ as ort_version from test_utils import ( - dump_data_and_model, fit_regression_model) # , TARGET_OPSET) + dump_data_and_model, fit_regression_model, TARGET_OPSET) -TARGET_OPSET = 12 # change when PR 551 +warnings_to_skip = (DeprecationWarning, FutureWarning, ConvergenceWarning) class TestSklearnDoubleTensorTypeRegressor(unittest.TestCase): @unittest.skipIf( StrictVersion(ort_version) <= StrictVersion("1.2.0"), reason="onnxruntime misses implementation for double") + @ignore_warnings(category=warnings_to_skip) def test_model_linear_regression_64(self): model, X = fit_regression_model(LinearRegression()) model_onnx = convert_sklearn( @@ -41,6 +47,7 @@ def test_model_linear_regression_64(self): StrictVersion(ort_version) < StrictVersion("1.6.0"), reason="onnxruntime misses implementation for " "Relu, Tanh, Sigmoid for double") + @ignore_warnings(category=warnings_to_skip) def test_model_mlpregressor_64(self): # Could not find an implementation for the node Relu:Relu(6) # Could not find an implementation for the node Tanh:Tanh(6) @@ -62,6 +69,7 @@ def test_model_mlpregressor_64(self): StrictVersion(ort_version) < StrictVersion("1.6.0"), reason="onnxruntime misses implementation for " "ReduceMean for double") + @ignore_warnings(category=warnings_to_skip) def test_bagging_regressor_sgd_64(self): # Could not find an implementation for # the node ReduceMean:ReduceMean(11) @@ -78,6 +86,7 @@ def test_bagging_regressor_sgd_64(self): @unittest.skipIf( StrictVersion(ort_version) <= StrictVersion("1.2.0"), reason="onnxruntime misses implementation for double") + @ignore_warnings(category=warnings_to_skip) def test_model_sgd_regressor_64(self): model, X = fit_regression_model(SGDRegressor()) model_onnx = convert_sklearn( @@ -92,6 +101,7 @@ def test_model_sgd_regressor_64(self): @unittest.skipIf( StrictVersion(ort_version) < StrictVersion("1.6.0"), reason="shape_inference fails") + @ignore_warnings(category=warnings_to_skip) def test_gpr_rbf_fitted_true_double(self): gp = GaussianProcessRegressor( alpha=1e-7, n_restarts_optimizer=15, normalize_y=True) @@ -107,6 +117,7 @@ def test_gpr_rbf_fitted_true_double(self): StrictVersion(ort_version) < StrictVersion("1.6.0"), reason="onnxruntime misses implementation for " "TopK for double") + @ignore_warnings(category=warnings_to_skip) def test_model_knn_regressor_double(self): # Could not find an implementation for the node To_TopK:TopK(11) model, X = fit_regression_model(KNeighborsRegressor(n_neighbors=2)) @@ -124,6 +135,7 @@ def test_model_knn_regressor_double(self): StrictVersion(ort_version) < StrictVersion("1.6.0"), reason="onnxruntime misses implementation for " "Sum for double") + @ignore_warnings(category=warnings_to_skip) def test_model_voting_regression(self): # Could not find an implementation for the node Sum:Sum(8) model = VotingRegressor([ diff --git a/tests/test_sklearn_nearest_neighbour_converter.py b/tests/test_sklearn_nearest_neighbour_converter.py index a564e1f7b..ec606ae66 100644 --- a/tests/test_sklearn_nearest_neighbour_converter.py +++ b/tests/test_sklearn_nearest_neighbour_converter.py @@ -54,6 +54,12 @@ TARGET_OPSET) +def dont_test_radius(): + return ( + StrictVersion(onnxruntime.__version__) <= StrictVersion("1.3.0") or + StrictVersion(onnx.__version__) <= StrictVersion("1.6.0")) + + class TestNearestNeighbourConverter(unittest.TestCase): @functools.lru_cache(maxsize=1) @@ -118,12 +124,7 @@ def test_model_knn_regressor(self): model, model_onnx, basename="SklearnKNeighborsRegressor-Dec4") - @unittest.skipIf( - StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), - reason="not available") - @unittest.skipIf( - StrictVersion(onnx.__version__) <= StrictVersion("1.6.0"), - reason="not available") + @unittest.skipIf(dont_test_radius(), reason="not available") def test_model_knn_regressor_radius(self): model, X = self._fit_model(RadiusNeighborsRegressor()) model_onnx = convert_sklearn(model, "KNN regressor", @@ -179,12 +180,7 @@ def test_model_knn_regressor_double(self): model, model_onnx, basename="SklearnKNeighborsRegressor64") - @unittest.skipIf( - StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), - reason="not available") - @unittest.skipIf( - StrictVersion(onnx.__version__) < StrictVersion("1.6.0"), - reason="not available") + @unittest.skipIf(dont_test_radius(), reason="not available") def test_model_knn_regressor_double_radius(self): model, X = self._fit_model(RadiusNeighborsRegressor()) model_onnx = convert_sklearn( @@ -217,9 +213,7 @@ def test_model_knn_regressor_yint(self): model, model_onnx, basename="SklearnKNeighborsRegressorYInt") - @unittest.skipIf( - StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), - reason="not available") + @unittest.skipIf(dont_test_radius(), reason="not available") def test_model_knn_regressor_yint_radius(self): model, X = self._fit_model( RadiusNeighborsRegressor(), label_int=True) @@ -247,12 +241,7 @@ def test_model_knn_regressor2_1(self): model, model_onnx, basename="SklearnKNeighborsRegressor2") - @unittest.skipIf( - StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), - reason="not available") - @unittest.skipIf( - StrictVersion(onnx.__version__) <= StrictVersion("1.6.0"), - reason="fails for earlier version of onnx (NaN)") + @unittest.skipIf(dont_test_radius(), reason="not available") def test_model_knn_regressor2_1_radius(self): model, X = self._fit_model_simple( RadiusNeighborsRegressor(algorithm="brute"), @@ -373,11 +362,7 @@ def test_model_knn_regressor_weights_distance_11(self): model, model_onnx, basename="SklearnKNeighborsRegressorWDist%d-Dec3" % op) - @unittest.skipIf( - StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), - reason="not available") - @unittest.skipIf(TARGET_OPSET < 11, - reason="needs higher target_opset") + @unittest.skipIf(dont_test_radius(), reason="not available") def test_model_knn_regressor_weights_distance_11_radius(self): model, X = self._fit_model_simple( RadiusNeighborsRegressor( @@ -432,11 +417,7 @@ def test_model_knn_classifier_binary_class(self): model, model_onnx, basename="SklearnKNeighborsClassifierBinary") - @unittest.skipIf(not onnx_built_with_ml(), - reason="Requires ONNX-ML extension.") - @unittest.skipIf( - StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), - reason="not available") + @unittest.skipIf(dont_test_radius(), reason="not available") @unittest.skipIf(onnx_opset_version() < 12, reason="needs higher target_opset") def test_model_knn_classifier_binary_class_radius(self): @@ -472,11 +453,7 @@ def test_model_knn_classifier_multi_class(self): model, model_onnx, basename="SklearnKNeighborsClassifierMulti") - @unittest.skipIf(not onnx_built_with_ml(), - reason="Requires ONNX-ML extension.") - @unittest.skipIf( - StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), - reason="not available") + @unittest.skipIf(dont_test_radius(), reason="not available") @unittest.skipIf(onnx_opset_version() < 12, reason="needs higher target_opset") def test_model_knn_classifier_multi_class_radius(self): @@ -883,13 +860,7 @@ def test_model_knn_iris_regressor_multi_reg(self): model, onx, basename="SklearnKNeighborsRegressorMReg") - @unittest.skipIf(not onnx_built_with_ml(), - reason="Requires ONNX-ML extension.") - @unittest.skipIf( - StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), - reason="not available") - @unittest.skipIf(onnx_opset_version() < 11, - reason="needs higher target_opset") + @unittest.skipIf(dont_test_radius(), reason="not available") def test_model_knn_iris_regressor_multi_reg_radius(self): iris = datasets.load_iris() X = iris.data.astype(numpy.float32) @@ -934,13 +905,7 @@ def test_model_knn_iris_classifier_multi_reg2_weight(self): model, onx, basename="SklearnKNeighborsClassifierMReg2-Out0") - @unittest.skipIf(not onnx_built_with_ml(), - reason="Requires ONNX-ML extension.") - @unittest.skipIf( - StrictVersion(onnxruntime.__version__) < StrictVersion("1.2.0"), - reason="not available") - @unittest.skipIf(onnx_opset_version() < 11, - reason="needs higher target_opset") + @unittest.skipIf(dont_test_radius(), reason="not available") def test_model_knn_iris_classifier_multi_reg2_weight_radius(self): iris = datasets.load_iris() X = iris.data.astype(numpy.float32) diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py index d6800e519..47c302359 100644 --- a/tests/test_utils/__init__.py +++ b/tests/test_utils/__init__.py @@ -47,7 +47,8 @@ def _get_ir_version(opv): return 3 -TARGET_OPSET = int(os.environ.get('TEST_TARGET_OPSET', - onnx.defs.onnx_opset_version())) -TARGET_IR = int(os.environ.get('TEST_TARGET_IR', - _get_ir_version(TARGET_OPSET))) +TARGET_OPSET = int( + os.environ.get('TEST_TARGET_OPSET', + min(12, onnx.defs.onnx_opset_version()))) +TARGET_IR = int( + os.environ.get('TEST_TARGET_IR', _get_ir_version(TARGET_OPSET))) From 592b893ef6050a69fdeaaab363ed985a18241b6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Wed, 9 Dec 2020 19:11:45 +0100 Subject: [PATCH 17/17] fix nightly build (#570) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré Co-authored-by: xavier dupré Signed-off-by: xavier dupré --- .azure-pipelines/win32-CI-nightly.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.azure-pipelines/win32-CI-nightly.yml b/.azure-pipelines/win32-CI-nightly.yml index 46d242b7d..a0d413414 100644 --- a/.azure-pipelines/win32-CI-nightly.yml +++ b/.azure-pipelines/win32-CI-nightly.yml @@ -81,6 +81,7 @@ jobs: displayName: 'install onnxruntime' - script: | + call activate py$(python.version) pip install scikit-learn$(sklearn.version) displayName: 'install scikit-learn'