Renames n_features_ into n_features_in_ for scikit-learn 1.2 (#953)

* Renames n_features_ into n_features_in_ for scikit-learn 1.2 Signed-off-by: xadupre <xadupre@microsoft.com> * update requirements Signed-off-by: xadupre <xadupre@microsoft.com> * add test against onnx test Signed-off-by: xadupre <xadupre@microsoft.com> * lint Signed-off-by: xadupre <xadupre@microsoft.com> * fix future warnings Signed-off-by: xadupre <xadupre@microsoft.com> * fix shape calculatore for KernelPCA Signed-off-by: xadupre <xadupre@microsoft.com> * remove load_boston Signed-off-by: xadupre <xadupre@microsoft.com> * fix example Signed-off-by: xadupre <xadupre@microsoft.com> * disable unit test for old skl Signed-off-by: xadupre <xadupre@microsoft.com> * lint Signed-off-by: xadupre <xadupre@microsoft.com> Signed-off-by: xadupre <xadupre@microsoft.com>
onnx · Dec 9, 2022 · 4da35da · 4da35da
1 parent 81fd481
commit 4da35da
Show file tree

Hide file tree

Showing 22 changed files with 114 additions and 81 deletions.
diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml
@@ -14,15 +14,28 @@ jobs:
   strategy:
     matrix:
 
-      Py310-Onnx120-Rt131-Skl11:
+      Py310-OnnxTest-Rt131-Skl12:
+        do.bench: '0'
+        python.version: '3.10'
+        numpy.version: '>=1.21.0'
+        scipy.version: '>=1.7.0'
+        onnx.version: '-i https://test.pypi.org/simple/ onnx==1.13.0rc1'
+        onnx.target_opset: ''
+        onnxrt.version: 'onnxruntime==1.13.1' # -i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003
+        sklearn.version: '>=1.2'
+        lgbm.version: ''
+        onnxcc.version: '>=1.8.1'  # git
+        run.example: '1'
+
+      Py310-Onnx120-Rt131-Skl12:
         do.bench: '0'
         python.version: '3.10'
         numpy.version: '>=1.21.0'
         scipy.version: '>=1.7.0'
         onnx.version: 'onnx==1.12.0' #'-i https://test.pypi.org/simple/ onnx==1.12.0rc4'
         onnx.target_opset: ''
         onnxrt.version: 'onnxruntime==1.13.1' # -i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003
-        sklearn.version: '>=1.1.3'
+        sklearn.version: '>=1.2'
         lgbm.version: ''
         onnxcc.version: '>=1.8.1'  # git
         run.example: '1'
@@ -34,7 +47,7 @@ jobs:
         onnx.version: 'onnx==1.12.0' #'-i https://test.pypi.org/simple/ onnx==1.12.0rc4'
         onnx.target_opset: ''
         onnxrt.version: 'onnxruntime==1.12.1' # -i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003
-        sklearn.version: '>=1.1.3'
+        sklearn.version: '==1.1.3'
         lgbm.version: ''
         onnxcc.version: '>=1.8.1'  # git
         run.example: '0'
@@ -46,7 +59,7 @@ jobs:
         onnx.version: 'onnx==1.12.0' #'-i https://test.pypi.org/simple/ onnx==1.12.0rc4'
         onnx.target_opset: ''
         onnxrt.version: 'onnxruntime==1.11.1' # -i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003
-        sklearn.version: '>=1.1'
+        sklearn.version: '==1.1.3'
         lgbm.version: ''
         onnxcc.version: '>=1.8.1'  # git
         run.example: '0'
@@ -58,7 +71,7 @@ jobs:
         onnx.version: 'onnx==1.11.0' # '-i https://test.pypi.org/simple/ onnx==1.11.0rc2'
         onnx.target_opset: ''
         onnxrt.version: 'onnxruntime==1.11.1' # -i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003
-        sklearn.version: '>=1.1'
+        sklearn.version: '==1.1.3'
         lgbm.version: ''
         onnxcc.version: '>=1.8.1'  # git
         run.example: '0'

diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml
@@ -13,15 +13,15 @@ jobs:
     vmImage: 'windows-latest'
   strategy:
     matrix:
-      Py310-Onnx120-Rt131-Skl11:
+      Py310-Onnx120-Rt131-Skl12:
         python.version: '3.10'
         onnx.version:  'onnx==1.12.0' #'-i https://test.pypi.org/simple/ onnx==1.12.0rc4'
         onnx.target_opset: ''
         numpy.version: 'numpy>=1.22.3'
         scipy.version: 'scipy'
         onnxrt.version: 'onnxruntime==1.13.1' # -i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003
         onnxcc.version: 'onnxconverter-common>=1.8.1'  # git+https://github.com/microsoft/onnxconverter-common.git
-        sklearn.version: '>=1.1'
+        sklearn.version: '>=1.2'
       Py310-Onnx120-Rt120-Skl11:
         python.version: '3.10'
         onnx.version:  'onnx==1.12.0' #'-i https://test.pypi.org/simple/ onnx==1.12.0rc4'
@@ -30,7 +30,7 @@ jobs:
         scipy.version: 'scipy'
         onnxrt.version: 'onnxruntime==1.12.0' # -i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003
         onnxcc.version: 'onnxconverter-common>=1.8.1'  # git+https://github.com/microsoft/onnxconverter-common.git
-        sklearn.version: '>=1.1'
+        sklearn.version: '==1.1.3'
       Py39-Onnx120-Rt1111-Skl11:
         python.version: '3.9'
         onnx.version:  'onnx==1.12.0' #'-i https://test.pypi.org/simple/ onnx==1.12.0rc4'
@@ -39,7 +39,7 @@ jobs:
         scipy.version: 'scipy'
         onnxrt.version: 'onnxruntime==1.11.1' # -i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003
         onnxcc.version: 'onnxconverter-common>=1.8.1'  # git+https://github.com/microsoft/onnxconverter-common.git
-        sklearn.version: '>=1.1'
+        sklearn.version: '==1.1.3'
       Py39-Onnx1110-Rt1111-Skl11:
         python.version: '3.9'
         onnx.version:  'onnx==1.11.0' # '-i https://test.pypi.org/simple/ onnx==1.11.0rc2'
@@ -48,7 +48,7 @@ jobs:
         scipy.version: 'scipy'
         onnxrt.version: 'onnxruntime==1.11.1' # -i https://test.pypi.org/simple/ ort-nightly==1.11.0.dev20220311003
         onnxcc.version: 'onnxconverter-common>=1.8.1'  # git+https://github.com/microsoft/onnxconverter-common.git
-        sklearn.version: '>=1.1'
+        sklearn.version: '==1.1.3'
       Py39-Onnx1110-Rt1111-Skl10:
         python.version: '3.9'
         onnx.version:  'onnx==1.11.0' # '-i https://test.pypi.org/simple/ onnx==1.11.0rc2'

diff --git a/docs/examples/plot_gpr.py b/docs/examples/plot_gpr.py
@@ -27,7 +27,7 @@
 import pprint
 import numpy
 import sklearn
-from sklearn.datasets import load_boston
+from sklearn.datasets import load_diabetes
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.gaussian_process.kernels import DotProduct, RBF
 from sklearn.model_selection import train_test_split
@@ -37,8 +37,8 @@
 from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType
 from skl2onnx import convert_sklearn
 
-bost = load_boston()
-X, y = bost.data, bost.target
+dataset = load_diabetes()
+X, y = dataset.data, dataset.target
 X_train, X_test, y_train, y_test = train_test_split(X, y)
 gpr = GaussianProcessRegressor(DotProduct() + RBF(), alpha=1.)
 gpr.fit(X_train, y_train)

diff --git a/docs/tutorial/plot_ebegin_float_double.py b/docs/tutorial/plot_ebegin_float_double.py
@@ -53,10 +53,8 @@
 the discord areas.
 """
 from mlprodict.sklapi import OnnxPipeline
-from skl2onnx.sklapi import CastTransformer, CastRegressor
+from skl2onnx.sklapi import CastTransformer
 from skl2onnx import to_onnx
-from mlprodict.onnx_conv import to_onnx as to_onnx_extended
-from mlprodict.onnxrt import OnnxInference
 from onnxruntime import InferenceSession
 from sklearn.model_selection import train_test_split
 from sklearn.tree import DecisionTreeRegressor
@@ -291,4 +289,3 @@ def diff(p1, p2):
 ort4 = sess4.run(None, {'X': X32})[0]
 
 print(diff(skl4, ort4))
-
diff --git a/docs/tutorial/plot_usparse_xgboost.py b/docs/tutorial/plot_usparse_xgboost.py
@@ -77,6 +77,7 @@
 y = data.target
 
 df = pandas.DataFrame(X)
+df.columns = [f"c{c}" for c in df.columns]
 df["text"] = [cst[i] for i in y]
 
 
@@ -146,7 +147,8 @@ def make_pipelines(df_train, y_train, models=None,
         try:
             pipe.fit(df_train, y_train)
         except TypeError as e:
-            obs = dict(model=model.__name__, pipe=pipe, error=e)
+            obs = dict(model=model.__name__, pipe=pipe, error=e,
+                       model_onnx=None)
             pipes.append(obs)
             continue
 
@@ -168,7 +170,7 @@ def make_pipelines(df_train, y_train, models=None,
             f.write(model_onnx.SerializeToString())
 
         sess = rt.InferenceSession(model_onnx.SerializeToString())
-        inputs = {"input": df[[0, 1]].values.astype(numpy.float32),
+        inputs = {"input": df[["c0", "c1"]].values.astype(numpy.float32),
                   "text": df[["text"]].values}
         pred_onx = sess.run(None, inputs)
 

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,14 +1,11 @@
-flatbuffers
-nose
-numpy
-protobuf
+# tests
 codecov
+flatbuffers
 pandas
 py-cpuinfo
 pybind11
 pytest
 pytest-cov
-scipy
 wheel
 
 # docs

diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,3 @@
-numpy>=1.15
-scipy>=1.0
-protobuf
 onnx>=1.2.1
-scikit-learn>=0.19, <1.2
+scikit-learn>=0.19, <1.3
 onnxconverter-common>=1.7.0
diff --git a/skl2onnx/algebra/onnx_operator_mixin.py b/skl2onnx/algebra/onnx_operator_mixin.py
@@ -70,16 +70,6 @@ def infer_initial_types(self):
                            "and initial_types are not defined.")
 
     def _find_sklearn_parent(self):
-        if (hasattr(self.__class__, 'predict') and
-                "predict" in self.__class__.__dict__):
-            raise RuntimeError("Method predict was modified. "
-                               "There is no parser or converter available "
-                               "for class '{}'.".format(self.__class__))
-        if (hasattr(self.__class__, 'transform') and
-                "transform" in self.__class__.__dict__):
-            raise RuntimeError("Method transform was modified. "
-                               "There is no parser or converter available "
-                               "for class '{}'.".format(self.__class__))
         for cl in self.__class__.__bases__:
             if issubclass(cl, BaseEstimator):
                 return cl

diff --git a/skl2onnx/common/utils_sklearn.py b/skl2onnx/common/utils_sklearn.py
@@ -2,6 +2,7 @@
 
 import copy
 from collections import OrderedDict
+import warnings
 from sklearn.base import BaseEstimator
 from sklearn.pipeline import Pipeline
 
@@ -48,7 +49,9 @@ def enumerate_model_names(model, prefix="", short=True):
                     (key.endswith("_") and not key.endswith("__") and
                      not key.startswith('_'))):
                 try:
-                    obj = getattr(model, key)
+                    with warnings.catch_warnings():
+                        warnings.simplefilter("ignore", FutureWarning)
+                        obj = getattr(model, key)
                 except AttributeError:
                     continue
                 if (hasattr(obj, 'get_params') and

diff --git a/skl2onnx/operator_converters/bagging.py b/skl2onnx/operator_converters/bagging.py
@@ -40,8 +40,10 @@ def _calculate_proba(scope, operator, container, model):
             'proba_%d' % index, operator.inputs[0].type.__class__())
 
         features = model.estimators_features_[index]
-        if (len(features) == model.n_features_ and
-                list(features) == list(range(model.n_features_))):
+        n_features = (model.n_features_in_ if hasattr(model, 'n_features_in_')
+                      else model.n_features_)
+        if (len(features) == n_features and
+                list(features) == list(range(n_features))):
             this_operator.inputs = operator.inputs
         else:
             # subset of features
@@ -181,8 +183,11 @@ def convert_sklearn_bagging_regressor(scope: Scope, operator: Operator,
         this_operator = scope.declare_local_operator(op_type, estimator)
 
         features = bagging_op.estimators_features_[index]
-        if (len(features) == bagging_op.n_features_ and
-                list(features) == list(range(bagging_op.n_features_))):
+        n_features = (bagging_op.n_features_in_
+                      if hasattr(bagging_op, 'n_features_in_')
+                      else bagging_op.n_features_)
+        if (len(features) == n_features and
+                list(features) == list(range(n_features))):
             this_operator.inputs = operator.inputs
         else:
             # subset of features

diff --git a/skl2onnx/operator_converters/calibrated_classifier_cv.py b/skl2onnx/operator_converters/calibrated_classifier_cv.py
@@ -293,7 +293,8 @@ def convert_calibrated_classifier_base_estimator(scope, operator, container,
     if dtype != np.float64:
         dtype = np.float32
 
-    base_model = model.base_estimator
+    base_model = (model.estimator if hasattr(model, 'estimator')
+                  else model.base_estimator)
     op_type = sklearn_operator_name_map[type(base_model)]
     n_classes = (len(model.classes_) if hasattr(model, 'classes_') else
                  len(base_model.classes_))

diff --git a/skl2onnx/operator_converters/gradient_boosting.py b/skl2onnx/operator_converters/gradient_boosting.py
@@ -31,14 +31,16 @@ def convert_sklearn_gradient_boosting_classifier(
 
     transform = 'LOGISTIC' if op.n_classes_ == 2 else 'SOFTMAX'
     if op.init == 'zero':
-        base_values = np.zeros(op.loss_.K)
+        loss = op._loss if hasattr(op, "_loss") else op.loss_
+        base_values = np.zeros(loss.K)
     elif op.init is None:
         if hasattr(op.estimators_[0, 0], 'n_features_in_'):
             # sklearn >= 1.2
-            x0 = np.zeros((1, op.estimators_[0, 0].n_features_in_))
+            n_features = op.estimators_[0, 0].n_features_in_
         else:
             # sklearn < 1.2
-            x0 = np.zeros((1, op.estimators_[0, 0].n_features_))
+            n_features = op.estimators_[0, 0].n_features_
+        x0 = np.zeros((1, n_features))
         if hasattr(op, '_raw_predict_init'):
             # sklearn >= 0.21
             base_values = op._raw_predict_init(x0).ravel()
@@ -114,7 +116,8 @@ def convert_sklearn_gradient_boosting_regressor(
     attrs['n_targets'] = 1
 
     if op.init == 'zero':
-        cst = np.zeros(op.loss_.K)
+        loss = op._loss if hasattr(op, "_loss") else op.loss_
+        cst = np.zeros(loss.K)
     elif op.init is None:
         # constant_ was introduced in scikit-learn 0.21.
         if hasattr(op.init_, 'constant_'):

diff --git a/skl2onnx/operator_converters/linear_regressor.py b/skl2onnx/operator_converters/linear_regressor.py
@@ -84,7 +84,7 @@ def convert_sklearn_bayesian_ridge(scope: Scope, operator: Operator,
         return
 
     proto_dtype = guess_proto_type(operator.inputs[0].type)
-    if op.normalize:
+    if hasattr(op, 'normalize') and op.normalize:
         # if self.normalize:
         #     X = (X - self.X_offset_) / self.X_scale_
         offset = scope.get_unique_variable_name('offset')

diff --git a/skl2onnx/operator_converters/naive_bayes.py b/skl2onnx/operator_converters/naive_bayes.py
@@ -99,7 +99,8 @@ def _joint_log_likelihood_gaussian(
     """
     features = model.theta_.shape[1]
     jointi = np.log(model.class_prior_)
-    sigma_sum_log = - 0.5 * np.sum(np.log(2. * np.pi * model.sigma_), axis=1)
+    var_sigma = model.var_ if hasattr(model, 'var_') else model.sigma_
+    sigma_sum_log = - 0.5 * np.sum(np.log(2. * np.pi * var_sigma), axis=1)
     theta_name = scope.get_unique_variable_name('theta')
     sigma_name = scope.get_unique_variable_name('sigma')
     sigma_sum_log_name = scope.get_unique_variable_name('sigma_sum_log')
@@ -116,7 +117,7 @@ def _joint_log_likelihood_gaussian(
         'part_log_likelihood')
 
     theta = model.theta_.reshape((1, -1, features))
-    sigma = model.sigma_.reshape((1, -1, features))
+    sigma = var_sigma.reshape((1, -1, features))
 
     container.add_initializer(theta_name, proto_dtype, theta.shape,
                               theta.ravel())
@@ -172,7 +173,11 @@ def _joint_log_likelihood_categorical(
         class_log_prior_name, onnx_proto.TensorProto.FLOAT,
         model.class_log_prior_.shape, model.class_log_prior_)
 
-    for i in range(model.n_features_):
+    n_features = (model.n_features_in_
+                  if hasattr(model, 'n_features_in_')
+                  else model.n_features_)
+
+    for i in range(n_features):
         feature_index_name = scope.get_unique_variable_name('feature_index')
         indices_name = scope.get_unique_variable_name('indices')
         cast_indices_name = scope.get_unique_variable_name('cast_indices')

diff --git a/skl2onnx/operator_converters/polynomial_features.py b/skl2onnx/operator_converters/polynomial_features.py
@@ -15,17 +15,17 @@ def convert_sklearn_polynomial_features(scope: Scope, operator: Operator,
     op = operator.raw_operator
     transformed_columns = [None] * (op.n_output_features_)
 
+    n_features = (op.n_features_in_
+                  if hasattr(op, 'n_features_in_')
+                  else op.n_features_)
     if hasattr(op, '_min_degree'):
         # scikit-learn >= 1.0
-        combinations = op._combinations(op.n_input_features_,
-                                        op._min_degree,
-                                        op._max_degree,
-                                        op.interaction_only,
-                                        op.include_bias)
+        combinations = op._combinations(
+            n_features, op._min_degree, op._max_degree, op.interaction_only,
+            op.include_bias)
     else:
-        combinations = op._combinations(op.n_input_features_, op.degree,
-                                        op.interaction_only,
-                                        op.include_bias)
+        combinations = op._combinations(
+            n_features, op.degree, op.interaction_only, op.include_bias)
 
     unit_name = None
     last_feat = None

diff --git a/skl2onnx/shape_calculators/kernel_pca.py b/skl2onnx/shape_calculators/kernel_pca.py
@@ -16,7 +16,9 @@ def calculate_sklearn_kernel_pca_output_shapes(operator):
         operator, good_input_types=[FloatTensorType, DoubleTensorType],
         good_output_types=[FloatTensorType, DoubleTensorType])
     N = operator.inputs[0].get_first_dimension()
-    C = operator.raw_operator.lambdas_.shape[0]
+    op = operator.raw_operator
+    lbd = op.eigenvalues_ if hasattr(op, 'eigenvalues_') else op.lambdas_
+    C = lbd.shape[0]
     operator.outputs[0].type.shape = [N, C]