Add annotation to converters and shape calculator (#677)

onnx · Jul 2, 2021 · 08a9653 · 08a9653
1 parent 82a1c9e
commit 08a9653
Show file tree

Hide file tree

Showing 103 changed files with 665 additions and 489 deletions.
diff --git a/benchmarks/bench_plot_onnxruntime_decision_tree.py b/benchmarks/bench_plot_onnxruntime_decision_tree.py
@@ -5,13 +5,8 @@
 Benchmark of onnxruntime on DecisionTree.
 """
 # Authors: Xavier Dupré (benchmark)
-import matplotlib
-
 from io import BytesIO
 from time import perf_counter as time
-from itertools import combinations, chain
-from itertools import combinations_with_replacement as combinations_w_r
-
 import numpy as np
 from numpy.random import rand
 from numpy.testing import assert_almost_equal
@@ -96,7 +91,8 @@ def bench(n_obs, n_features, max_depths, methods,
 
                     fct1, fct2 = fcts[method]
 
-                    if not allow_configuration(n=n, nfeat=nfeat, max_depth=max_depth):
+                    if not allow_configuration(
+                            n=n, nfeat=nfeat, max_depth=max_depth):
                         continue
 
                     obs = dict(n_obs=n, nfeat=nfeat,
@@ -162,8 +158,10 @@ def plot_results(df, verbose=False):
                 if row == ax.shape[0] - 1:
                     a.set_xlabel("N features", fontsize='x-small')
                 if pos == 0:
-                    a.set_ylabel("Time (s) n_obs={}\nmax_depth={}".format(n_obs, max_depth),
-                                 fontsize='x-small')
+                    a.set_ylabel(
+                        "Time (s) n_obs={}\nmax_depth={}".format(
+                            n_obs, max_depth),
+                        fontsize='x-small')
 
                 color = 'b'
                 subset = df[(df.method == method) & (df.n_obs == n_obs) &

diff --git a/benchmarks/bench_plot_onnxruntime_hgb.py b/benchmarks/bench_plot_onnxruntime_hgb.py
@@ -4,21 +4,15 @@
 """
 Benchmark of onnxruntime on RandomForestRegressor.
 """
-import sys
 import warnings
 from io import BytesIO
 from time import perf_counter as time
-from itertools import (
-    combinations, chain,
-    combinations_with_replacement as combinations_w_r)
-import matplotlib
 import numpy as np
 from numpy.random import rand
 from numpy.testing import assert_almost_equal
 import matplotlib.pyplot as plt
 import pandas
 from sklearn import config_context
-from sklearn.experimental import enable_hist_gradient_boosting
 from sklearn.ensemble import HistGradientBoostingRegressor
 from sklearn.utils._testing import ignore_warnings
 from skl2onnx import convert_sklearn
@@ -134,7 +128,8 @@ def bench(n_obs, n_features, max_depths, n_estimatorss,
                             if len(p1.shape) == 1 and len(p2.shape) == 2:
                                 p2 = p2.ravel()
                             try:
-                                assert_almost_equal(p1.ravel(), p2.ravel(), decimal=5)
+                                assert_almost_equal(
+                                    p1.ravel(), p2.ravel(), decimal=5)
                             except AssertionError as e:
                                 warnings.warn(str(e))
     return res
@@ -163,7 +158,8 @@ def plot_results(df, verbose=False):
                         "Time (s) n_obs={}\nmax_depth={}".format(
                             n_obs, max_depth), fontsize='x-small')
 
-                for color, n_estimators in zip('brgyc', sorted(set(df.n_estimators))):
+                for color, n_estimators in zip(
+                        'brgyc', sorted(set(df.n_estimators))):
                     subset = df[(df.n_obs == n_obs)
                                 & (df.max_depth == max_depth)
                                 & (df.n_estimators == n_estimators)]
@@ -174,14 +170,16 @@ def plot_results(df, verbose=False):
                         print(subset)
 
                     label = "skl ne={}".format(n_estimators)
-                    subset.plot(x="nfeat", y="time_skl", label=label, ax=a,
-                                logx=True, logy=True, c=color, style='--', lw=5)
+                    subset.plot(
+                        x="nfeat", y="time_skl", label=label, ax=a,
+                        logx=True, logy=True, c=color, style='--', lw=5)
                     label = "ort ne={}".format(n_estimators)
                     subset.plot(x="nfeat", y="time_ort", label=label, ax=a,
                                 logx=True, logy=True, c=color, lw=3)
                     label = "lite ne={}".format(n_estimators)
-                    subset.plot(x="nfeat", y="time_lite", label=label, ax=a,
-                                logx=True, logy=True, c=color, style='-.', lw=3)
+                    subset.plot(
+                        x="nfeat", y="time_lite", label=label, ax=a,
+                        logx=True, logy=True, c=color, style='-.', lw=3)
 
                 a.legend(loc=0, fontsize='x-small')
                 if row == 0:
@@ -209,7 +207,7 @@ def run_bench(repeat=100, verbose=False):
     print("Total time = %0.3f sec\n" % (end - start))
 
     # plot the results
-    #plot_results(results_df, verbose=verbose)
+    # plot_results(results_df, verbose=verbose)
     return results_df
 
 

diff --git a/benchmarks/bench_plot_onnxruntime_linreg.py b/benchmarks/bench_plot_onnxruntime_linreg.py
@@ -4,14 +4,9 @@
 """
 Benchmark of onnxruntime on LinearRegression.
 """
-import matplotlib
-
 import warnings
 from io import BytesIO
 from time import perf_counter as time
-from itertools import combinations, chain
-from itertools import combinations_with_replacement as combinations_w_r
-
 import numpy as np
 from numpy.random import rand
 from numpy.testing import assert_almost_equal
@@ -91,7 +86,8 @@ def bench(n_obs, n_features, fit_intercepts, methods,
 
                     fct1, fct2 = fcts[method]
 
-                    if not allow_configuration(n=n, nfeat=nfeat, fit_intercept=fit_intercept):
+                    if not allow_configuration(
+                            n=n, nfeat=nfeat, fit_intercept=fit_intercept):
                         continue
 
                     obs = dict(n_obs=n, nfeat=nfeat,
@@ -132,7 +128,8 @@ def bench(n_obs, n_features, fit_intercepts, methods,
                         if len(p1.shape) == 1 and len(p2.shape) == 2:
                             p2 = p2.ravel()
                             try:
-                                assert_almost_equal(p1.ravel(), p2.ravel(), decimal=5)
+                                assert_almost_equal(
+                                    p1.ravel(), p2.ravel(), decimal=5)
                             except AssertionError as e:
                                 warnings.warn(str(e))
     return res
@@ -157,8 +154,10 @@ def plot_results(df, verbose=False):
                 if row == ax.shape[0] - 1:
                     a.set_xlabel("N features", fontsize='x-small')
                 if pos == 0:
-                    a.set_ylabel("Time (s) n_obs={}\nfit_intercept={}".format(n_obs, fit_intercept),
-                                 fontsize='x-small')
+                    a.set_ylabel(
+                        "Time (s) n_obs={}\nfit_intercept={}".format(
+                            n_obs, fit_intercept),
+                        fontsize='x-small')
 
                 color = 'b'
                 subset = df[(df.method == method) & (df.n_obs == n_obs) &

diff --git a/benchmarks/bench_plot_onnxruntime_logreg.py b/benchmarks/bench_plot_onnxruntime_logreg.py
@@ -5,13 +5,8 @@
 Benchmark of onnxruntime on LogisticRegression.
 """
 # Authors: Xavier Dupré (benchmark)
-import matplotlib
-
 from io import BytesIO
 from time import perf_counter as time
-from itertools import combinations, chain
-from itertools import combinations_with_replacement as combinations_w_r
-
 import numpy as np
 from numpy.random import rand
 from numpy.testing import assert_almost_equal
@@ -102,7 +97,8 @@ def bench(n_obs, n_features, fit_intercepts, methods,
 
                     fct1, fct2 = fcts[method]
 
-                    if not allow_configuration(n=n, nfeat=nfeat, fit_intercept=fit_intercept):
+                    if not allow_configuration(
+                            n=n, nfeat=nfeat, fit_intercept=fit_intercept):
                         continue
 
                     obs = dict(n_obs=n, nfeat=nfeat,
@@ -165,8 +161,10 @@ def plot_results(df, verbose=False):
                 if row == ax.shape[0] - 1:
                     a.set_xlabel("N features", fontsize='x-small')
                 if pos == 0:
-                    a.set_ylabel("Time (s) n_obs={}\nfit_intercept={}".format(n_obs, fit_intercept),
-                                 fontsize='x-small')
+                    a.set_ylabel(
+                        "Time (s) n_obs={}\nfit_intercept={}".format(
+                            n_obs, fit_intercept),
+                        fontsize='x-small')
 
                 color = 'b'
                 subset = df[(df.method == method) & (df.n_obs == n_obs) &

diff --git a/benchmarks/bench_plot_onnxruntime_random_forest.py b/benchmarks/bench_plot_onnxruntime_random_forest.py
@@ -7,12 +7,9 @@
 # Authors: Xavier Dupré (benchmark)
 from io import BytesIO
 from time import perf_counter as time
-from itertools import combinations, chain
-from itertools import combinations_with_replacement as combinations_w_r
 import numpy as np
 from numpy.random import rand
 from numpy.testing import assert_almost_equal
-import matplotlib
 import matplotlib.pyplot as plt
 import pandas
 from sklearn import config_context
@@ -101,8 +98,9 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, methods,
 
                         fct1, fct2 = fcts[method]
 
-                        if not allow_configuration(n=n, nfeat=nfeat,
-                                                   max_depth=max_depth, n_estimator=n_estimators):
+                        if not allow_configuration(
+                                n=n, nfeat=nfeat,
+                                max_depth=max_depth, n_estimator=n_estimators):
                             continue
 
                         obs = dict(n_obs=n, nfeat=nfeat, max_depth=max_depth,
@@ -168,10 +166,13 @@ def plot_results(df, verbose=False):
                 if row == ax.shape[0] - 1:
                     a.set_xlabel("N features", fontsize='x-small')
                 if pos == 0:
-                    a.set_ylabel("Time (s) n_obs={}\nmax_depth={}".format(n_obs, max_depth),
-                                 fontsize='x-small')
+                    a.set_ylabel(
+                        "Time (s) n_obs={}\nmax_depth={}".format(
+                            n_obs, max_depth),
+                        fontsize='x-small')
 
-                for color, n_estimators in zip('brgyc', sorted(set(df.n_estimators))):
+                for color, n_estimators in zip(
+                        'brgyc', sorted(set(df.n_estimators))):
                     subset = df[(df.method == method) & (df.n_obs == n_obs)
                                 & (df.max_depth == max_depth)
                                 & (df.n_estimators == n_estimators)]

diff --git a/benchmarks/bench_plot_onnxruntime_random_forest_reg.py b/benchmarks/bench_plot_onnxruntime_random_forest_reg.py
@@ -8,10 +8,6 @@
 import warnings
 from io import BytesIO
 from time import perf_counter as time
-from itertools import combinations, chain
-from itertools import combinations_with_replacement as combinations_w_r
-
-import matplotlib
 import numpy as np
 from numpy.random import rand
 from numpy.testing import assert_almost_equal
@@ -49,10 +45,12 @@ def fcts_model(X, y, max_depth, n_estimators, n_jobs):
         try:
             lite = treelite.sklearn.import_model(rf)
             name = "lite{}.dll".format(id(rf))
-            lite.export_lib(toolchain='msvc' if sys.platform == "win32" else "gcc",
-                            libpath=name, verbose=False)
+            lite.export_lib(
+                toolchain='msvc' if sys.platform == "win32" else "gcc",
+                libpath=name, verbose=False)
             lite_predictor = treelite_runtime.Predictor(name, verbose=False)
-        except (treelite.util.TreeliteError, PermissionError, UnicodeDecodeError):
+        except (treelite.util.TreeliteError, PermissionError,
+                UnicodeDecodeError):
             lite_predictor = None
 
     def predict_skl_predict(X, model=rf):
@@ -69,7 +67,7 @@ def predict_treelite_predict(X, sess=sess):
     return {'predict': (
         predict_skl_predict,
         predict_onnxrt_predict,
-        None, # predict_treelite_predict if lite_predictor is not None else None
+        None,
     )}
 
 
@@ -95,24 +93,28 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, n_jobss,
         for n_jobs in n_jobss:
             for max_depth in max_depths:
                 for n_estimators in n_estimatorss:
-                    fcts = fcts_model(X_train, y_train, max_depth, n_estimators, n_jobs)
+                    fcts = fcts_model(X_train, y_train,
+                                      max_depth, n_estimators, n_jobs)
 
                     for n in n_obs:
                         for method in methods:
 
                             fct1, fct2, fct3 = fcts[method]
 
-                            if not allow_configuration(n=n, nfeat=nfeat,
-                                                       max_depth=max_depth,
-                                                       n_estimator=n_estimators,
-                                                       n_jobs=n_jobs, method=method):
+                            if not allow_configuration(
+                                    n=n, nfeat=nfeat,
+                                    max_depth=max_depth,
+                                    n_estimator=n_estimators,
+                                    n_jobs=n_jobs, method=method):
                                 continue
 
-                            obs = dict(n_obs=n, nfeat=nfeat, max_depth=max_depth,
-                                       n_estimators=n_estimators, method=method,
-                                       n_jobs=n_jobs)
+                            obs = dict(
+                                n_obs=n, nfeat=nfeat, max_depth=max_depth,
+                                n_estimators=n_estimators, method=method,
+                                n_jobs=n_jobs)
 
-                            # creates different inputs to avoid caching in any ways
+                            # creates different inputs to avoid caching
+                            # in any ways
                             Xs = []
                             for r in range(repeat):
                                 x = np.empty((n, nfeat))
@@ -164,7 +166,8 @@ def bench(n_obs, n_features, max_depths, n_estimatorss, n_jobss,
                                 if len(p1.shape) == 1 and len(p2.shape) == 2:
                                     p2 = p2.ravel()
                                 try:
-                                    assert_almost_equal(p1.ravel(), p2.ravel(), decimal=5)
+                                    assert_almost_equal(
+                                        p1.ravel(), p2.ravel(), decimal=5)
                                 except AssertionError as e:
                                     warnings.warn(str(e))
     return res
@@ -193,7 +196,8 @@ def plot_results(df, verbose=False):
                         "Time (s) n_obs={}\nmax_depth={} n_jobs={}".format(
                             n_obs, max_depth, n_jobs), fontsize='x-small')
 
-                for color, n_estimators in zip('brgyc', sorted(set(df.n_estimators))):
+                for color, n_estimators in zip(
+                        'brgyc', sorted(set(df.n_estimators))):
                     subset = df[(df.n_jobs == n_jobs) & (df.n_obs == n_obs)
                                 & (df.max_depth == max_depth)
                                 & (df.n_estimators == n_estimators)]
@@ -204,14 +208,17 @@ def plot_results(df, verbose=False):
                         print(subset)
 
                     label = "skl ne={}".format(n_estimators)
-                    subset.plot(x="nfeat", y="time_skl", label=label, ax=a,
-                                logx=True, logy=True, c=color, style='--', lw=5)
+                    subset.plot(
+                        x="nfeat", y="time_skl", label=label, ax=a,
+                        logx=True, logy=True, c=color, style='--', lw=5)
                     label = "ort ne={}".format(n_estimators)
-                    subset.plot(x="nfeat", y="time_ort", label=label, ax=a,
-                                logx=True, logy=True, c=color, lw=3)
+                    subset.plot(
+                        x="nfeat", y="time_ort", label=label, ax=a,
+                        logx=True, logy=True, c=color, lw=3)
                     label = "lite ne={}".format(n_estimators)
-                    subset.plot(x="nfeat", y="time_lite", label=label, ax=a,
-                                logx=True, logy=True, c=color, style='-.', lw=3)
+                    subset.plot(
+                        x="nfeat", y="time_lite", label=label, ax=a,
+                        logx=True, logy=True, c=color, style='-.', lw=3)
 
                 a.legend(loc=0, fontsize='x-small')
                 if row == 0:
@@ -240,7 +247,7 @@ def run_bench(repeat=100, verbose=False):
     print("Total time = %0.3f sec\n" % (end - start))
 
     # plot the results
-    #plot_results(results_df, verbose=verbose)
+    # plot_results(results_df, verbose=verbose)
     return results_df
 
 

diff --git a/benchmarks/bench_plot_onnxruntime_svm_reg.py b/benchmarks/bench_plot_onnxruntime_svm_reg.py
@@ -5,14 +5,10 @@
 Benchmark of onnxruntime on SVM.
 """
 # Authors: Xavier Dupré (benchmark)
-import sys
 import warnings
 from io import BytesIO
 from time import perf_counter as time
-from itertools import combinations, chain
-from itertools import combinations_with_replacement as combinations_w_r
 
-import matplotlib
 import numpy as np
 from numpy.random import rand
 from numpy.testing import assert_almost_equal
@@ -128,7 +124,8 @@ def bench(n_obs, n_features, kernels,
                         if len(p1.shape) == 1 and len(p2.shape) == 2:
                             p2 = p2.ravel()
                         try:
-                            assert_almost_equal(p1.ravel(), p2.ravel(), decimal=3)
+                            assert_almost_equal(
+                                p1.ravel(), p2.ravel(), decimal=3)
                         except AssertionError as e:
                             warnings.warn(str(e))
     return res
@@ -161,7 +158,7 @@ def plot_results(df, verbose=False):
             if verbose:
                 print(subset)
 
-            label="skl %s" % kernel
+            label = "skl %s" % kernel
             subset.plot(x="nfeat", y="time_skl", label=label, ax=a,
                         logx=True, logy=True, c=color, style='--', lw=5)
             label = "ort %s" % kernel