From 6954f891dcc66e74f82b5fc818f4d15209636236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 27 Jul 2021 17:38:15 +0200 Subject: [PATCH 1/6] Fix bug with SubEstimator not directly connected to the inputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré --- skl2onnx/common/_topology.py | 10 + skl2onnx/operator_converters/scaler_op.py | 242 +++++++++--------- ...st_algebra_onnx_operators_sub_estimator.py | 80 +++++- 3 files changed, 207 insertions(+), 125 deletions(-) diff --git a/skl2onnx/common/_topology.py b/skl2onnx/common/_topology.py index 8591365d1..a8bd1f47b 100644 --- a/skl2onnx/common/_topology.py +++ b/skl2onnx/common/_topology.py @@ -772,6 +772,16 @@ def topological_operator_iterator(self): # least one operator should be evaluated. If not, we need # to terminate this procedure to avoid dead lock. if not is_evaluation_happened: + for op in self.unordered_operator_iterator(): + if not op.is_evaluated and op.raw_operator is not None: + raise RuntimeError( + "One operator was not evaluated (" + "inputs fed=%r, outputs fed=%r, op=%r)." % ( + all(variable.is_fed + for variable in operator.inputs), + all(variable.is_fed + for variable in operator.outputs), + op)) break def _check_structure(self): diff --git a/skl2onnx/operator_converters/scaler_op.py b/skl2onnx/operator_converters/scaler_op.py index baf81b16a..1a04040de 100644 --- a/skl2onnx/operator_converters/scaler_op.py +++ b/skl2onnx/operator_converters/scaler_op.py @@ -1,121 +1,121 @@ -# SPDX-License-Identifier: Apache-2.0 - - -import numpy as np -from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler -from sklearn.preprocessing import RobustScaler, StandardScaler -from ..algebra.onnx_ops import OnnxSub, OnnxDiv, OnnxCast -from ..common._registration import register_converter -from ..common._topology import Scope, Operator -from ..common._container import ModelComponentContainer -from ..common.data_types import guess_numpy_type, guess_proto_type -from ..proto import onnx_proto -from .common import concatenate_variables - - -def convert_sklearn_scaler(scope: Scope, operator: Operator, - container: ModelComponentContainer): - # If there are multiple input variables, we need to combine them as a - # whole tensor. Integer(s) would be converted to float(s). - # Options div use true division instead of Scaler operator - # which replaces a division by a multiplication. - # This leads to discrepencies in some cases. - if len(operator.inputs) > 1: - feature_name = concatenate_variables(scope, operator.inputs, container) - else: - feature_name = operator.inputs[0].full_name - - op = operator.raw_operator - op_type = 'Scaler' - attrs = {'name': scope.get_unique_operator_name(op_type)} - - if isinstance(op, StandardScaler): - C = (operator.inputs[0].type.shape[1] - if len(operator.inputs[0].type.shape) == 2 else 1) - attrs['offset'] = op.mean_ if op.with_mean else [0.0] * C - attrs['scale'] = 1.0 / op.scale_ if op.with_std else [1.0] * C - inv_scale = op.scale_ if op.with_std else None - elif isinstance(op, RobustScaler): - C = (operator.inputs[0].type.shape[1] - if len(operator.inputs[0].type.shape) == 2 else 1) - attrs['offset'] = op.center_ if op.with_centering else [0.0] * C - attrs['scale'] = 1.0 / op.scale_ if op.with_scaling else [1.0] * C - inv_scale = op.scale_ if op.with_scaling else None - elif isinstance(op, MinMaxScaler): - attrs['scale'] = op.scale_ - # Add 1e-8 to avoid divided by 0 - attrs['offset'] = -op.min_/(op.scale_ + 1e-8) - inv_scale = None - elif isinstance(op, MaxAbsScaler): - C = (operator.inputs[0].type.shape[1] - if len(operator.inputs[0].type.shape) == 2 else 1) - attrs['scale'] = 1.0 / op.scale_ - attrs['offset'] = [0.] * C - inv_scale = op.scale_ - else: - raise ValueError('Only scikit-learn StandardScaler and RobustScaler ' - 'are supported but got %s. You may raise ' - 'an issue at ' - 'https://github.com/onnx/sklearn-onnx/issues.' - '' % type(op)) - - proto_dtype = guess_proto_type(operator.inputs[0].type) - if proto_dtype != onnx_proto.TensorProto.DOUBLE: - proto_dtype = onnx_proto.TensorProto.FLOAT - - dtype = guess_numpy_type(operator.inputs[0].type) - if dtype != np.float64: - dtype = np.float32 - for k in attrs: - v = attrs[k] - if isinstance(v, np.ndarray) and v.dtype != dtype: - attrs[k] = v.astype(dtype) - - if dtype == np.float64: - opv = container.target_opset - sub = OnnxSub( - feature_name, attrs['offset'].astype(dtype), - op_version=opv) - div = OnnxDiv(sub, inv_scale.astype(dtype), - op_version=opv, - output_names=[operator.outputs[0].full_name]) - div.add_to(scope, container) - return - - if inv_scale is not None: - options = container.get_options(op, dict(div='std')) - div = options['div'] - if div == 'div': - opv = container.target_opset - sub = OnnxSub( - feature_name, attrs['offset'].astype(dtype), - op_version=opv) - div = OnnxDiv(sub, inv_scale.astype(dtype), - op_version=opv, - output_names=[operator.outputs[0].full_name]) - div.add_to(scope, container) - return - if div == 'div_cast': - opv = container.target_opset - cast = OnnxCast(feature_name, to=onnx_proto.TensorProto.DOUBLE, - op_version=opv) - sub = OnnxSub(cast, attrs['offset'].astype(np.float64), - op_version=opv) - div = OnnxDiv(sub, inv_scale.astype(np.float64), op_version=opv) - cast = OnnxCast(div, to=proto_dtype, op_version=opv, - output_names=[operator.outputs[0].full_name]) - cast.add_to(scope, container) - return - - container.add_node( - op_type, feature_name, operator.outputs[0].full_name, - op_domain='ai.onnx.ml', **attrs) - - -register_converter('SklearnRobustScaler', convert_sklearn_scaler, - options={'div': ['std', 'div', 'div_cast']}) -register_converter('SklearnScaler', convert_sklearn_scaler, - options={'div': ['std', 'div', 'div_cast']}) -register_converter('SklearnMinMaxScaler', convert_sklearn_scaler) -register_converter('SklearnMaxAbsScaler', convert_sklearn_scaler, - options={'div': ['std', 'div', 'div_cast']}) +# SPDX-License-Identifier: Apache-2.0 + + +import numpy as np +from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler +from sklearn.preprocessing import RobustScaler, StandardScaler +from ..algebra.onnx_ops import OnnxSub, OnnxDiv, OnnxCast +from ..common._registration import register_converter +from ..common._topology import Scope, Operator +from ..common._container import ModelComponentContainer +from ..common.data_types import guess_numpy_type, guess_proto_type +from ..proto import onnx_proto +from .common import concatenate_variables + + +def convert_sklearn_scaler(scope: Scope, operator: Operator, + container: ModelComponentContainer): + # If there are multiple input variables, we need to combine them as a + # whole tensor. Integer(s) would be converted to float(s). + # Options div use true division instead of Scaler operator + # which replaces a division by a multiplication. + # This leads to discrepencies in some cases. + if len(operator.inputs) > 1: + feature_name = concatenate_variables(scope, operator.inputs, container) + else: + feature_name = operator.inputs[0].full_name + + op = operator.raw_operator + op_type = 'Scaler' + attrs = {'name': scope.get_unique_operator_name(op_type)} + + if isinstance(op, StandardScaler): + C = (operator.inputs[0].type.shape[1] + if len(operator.inputs[0].type.shape) == 2 else 1) + attrs['offset'] = op.mean_ if op.with_mean else [0.0] * C + attrs['scale'] = 1.0 / op.scale_ if op.with_std else [1.0] * C + inv_scale = op.scale_ if op.with_std else None + elif isinstance(op, RobustScaler): + C = (operator.inputs[0].type.shape[1] + if len(operator.inputs[0].type.shape) == 2 else 1) + attrs['offset'] = op.center_ if op.with_centering else [0.0] * C + attrs['scale'] = 1.0 / op.scale_ if op.with_scaling else [1.0] * C + inv_scale = op.scale_ if op.with_scaling else None + elif isinstance(op, MinMaxScaler): + attrs['scale'] = op.scale_ + # Add 1e-8 to avoid divided by 0 + attrs['offset'] = -op.min_/(op.scale_ + 1e-8) + inv_scale = None + elif isinstance(op, MaxAbsScaler): + C = (operator.inputs[0].type.shape[1] + if len(operator.inputs[0].type.shape) == 2 else 1) + attrs['scale'] = 1.0 / op.scale_ + attrs['offset'] = [0.] * C + inv_scale = op.scale_ + else: + raise ValueError('Only scikit-learn StandardScaler and RobustScaler ' + 'are supported but got %s. You may raise ' + 'an issue at ' + 'https://github.com/onnx/sklearn-onnx/issues.' + '' % type(op)) + + proto_dtype = guess_proto_type(operator.inputs[0].type) + if proto_dtype != onnx_proto.TensorProto.DOUBLE: + proto_dtype = onnx_proto.TensorProto.FLOAT + + dtype = guess_numpy_type(operator.inputs[0].type) + if dtype != np.float64: + dtype = np.float32 + for k in attrs: + v = attrs[k] + if isinstance(v, np.ndarray) and v.dtype != dtype: + attrs[k] = v.astype(dtype) + + if dtype == np.float64: + opv = container.target_opset + sub = OnnxSub( + feature_name, attrs['offset'].astype(dtype), + op_version=opv) + div = OnnxDiv(sub, inv_scale.astype(dtype), + op_version=opv, + output_names=[operator.outputs[0].full_name]) + div.add_to(scope, container) + return + + if inv_scale is not None: + options = container.get_options(op, dict(div='std')) + div = options['div'] + if div == 'div': + opv = container.target_opset + sub = OnnxSub( + feature_name, attrs['offset'].astype(dtype), + op_version=opv) + div = OnnxDiv(sub, inv_scale.astype(dtype), + op_version=opv, + output_names=[operator.outputs[0].full_name]) + div.add_to(scope, container) + return + if div == 'div_cast': + opv = container.target_opset + cast = OnnxCast(feature_name, to=onnx_proto.TensorProto.DOUBLE, + op_version=opv) + sub = OnnxSub(cast, attrs['offset'].astype(np.float64), + op_version=opv) + div = OnnxDiv(sub, inv_scale.astype(np.float64), op_version=opv) + cast = OnnxCast(div, to=proto_dtype, op_version=opv, + output_names=[operator.outputs[0].full_name]) + cast.add_to(scope, container) + return + + container.add_node( + op_type, feature_name, operator.outputs[0].full_name, + op_domain='ai.onnx.ml', **attrs) + + +register_converter('SklearnRobustScaler', convert_sklearn_scaler, + options={'div': ['std', 'div', 'div_cast']}) +register_converter('SklearnScaler', convert_sklearn_scaler, + options={'div': ['std', 'div', 'div_cast']}) +register_converter('SklearnMinMaxScaler', convert_sklearn_scaler) +register_converter('SklearnMaxAbsScaler', convert_sklearn_scaler, + options={'div': ['std', 'div', 'div_cast']}) diff --git a/tests/test_algebra_onnx_operators_sub_estimator.py b/tests/test_algebra_onnx_operators_sub_estimator.py index 7d5e70cda..704d6e498 100644 --- a/tests/test_algebra_onnx_operators_sub_estimator.py +++ b/tests/test_algebra_onnx_operators_sub_estimator.py @@ -5,18 +5,21 @@ from distutils.version import StrictVersion import numpy as np from numpy.testing import assert_almost_equal -from sklearn.base import BaseEstimator, ClassifierMixin, clone +from sklearn.base import ( + BaseEstimator, ClassifierMixin, clone, TransformerMixin) from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split +from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler from onnxruntime import InferenceSession, __version__ as ort_version from skl2onnx.algebra.onnx_ops import ( - OnnxIdentity, OnnxCast, OnnxReduceMax, OnnxGreater -) + OnnxIdentity, OnnxCast, OnnxReduceMax, OnnxGreater, + OnnxExp) from skl2onnx import update_registered_converter from skl2onnx import to_onnx, get_model_alias from skl2onnx.proto import onnx_proto -from skl2onnx.common.data_types import FloatTensorType, Int64TensorType +from skl2onnx.common.data_types import ( + FloatTensorType, Int64TensorType, guess_numpy_type) from skl2onnx.algebra.onnx_operator import OnnxSubEstimator from test_utils import TARGET_OPSET @@ -137,6 +140,50 @@ def dummy_conv_2(scope, operator): pass +class MinMaxScalerTwo(BaseEstimator, TransformerMixin): + + def __init__(self): + pass + + def fit(self, X, y): + self.est1_ = MinMaxScaler().fit(X) + x2 = np.exp(self.est1_.transform(X)) + self.est2_ = MaxAbsScaler().fit(x2) + return self + + def transform(self, X): + x2 = np.exp(self.est1_.transform(X)) + return self.est2_.transform(x2) + + +def subsub_mmtwo_parser(scope, model, inputs, custom_parsers=None): + alias = get_model_alias(type(model)) + this_operator = scope.declare_local_operator(alias, model) + this_operator.inputs.append(inputs[0]) + cls_type = inputs[0].type.__class__ + val = scope.declare_local_variable('variable', cls_type()) + this_operator.outputs.append(val) + return this_operator.outputs + + +def subsub_mmtwo_shape_calculator(operator): + operator.outputs[0].type.shape = operator.inputs[0].type.shape + + +def subsub_mmtwo_converter(scope, operator, container): + op = operator.raw_operator + opv = container.target_opset + out = operator.outputs + X = operator.inputs[0] + x2 = OnnxSubEstimator(op.est1_, X, op_version=opv) + x2.set_onnx_name_prefix('AAA') + x2_exp = OnnxExp(x2, op_version=opv) + x3 = OnnxSubEstimator(op.est2_, x2_exp, op_version=opv) + x3.set_onnx_name_prefix('BBB') + final = OnnxIdentity(x3, op_version=opv, output_names=out[:1]) + final.add_to(scope, container) + + class TestOnnxOperatorSubEstimator(unittest.TestCase): @unittest.skipIf( @@ -232,6 +279,31 @@ def test_sub_estimator(self): assert_almost_equal(model.predict_proba(X32), res[1], decimal=4) assert_almost_equal(model.validate(X32), res[2]) + @unittest.skipIf( + StrictVersion(ort_version) < StrictVersion("1.0"), + reason="Cast not available.") + def test_sub_sub_estimator(self): + data = load_iris() + X, y = data.data, data.target + X_train, X_test, y_train, y_test = train_test_split(X, y) + + model = MinMaxScalerTwo() + model.fit(X_train, y_train) + + update_registered_converter( + MinMaxScalerTwo, "SubSubDummy", + subsub_mmtwo_shape_calculator, + subsub_mmtwo_converter, + parser=subsub_mmtwo_parser) + + X32 = X_test[:5].astype(np.float32) + model_onnx = to_onnx( + model, X32, target_opset=TARGET_OPSET) + sess = InferenceSession(model_onnx.SerializeToString()) + res = sess.run(None, {'X': X32}) + assert_almost_equal(model.transform(X32), res[0]) + if __name__ == "__main__": + TestOnnxOperatorSubEstimator().test_sub_sub_estimator() unittest.main() From 1303a30f2072efcd59313c2d84454a7c55b9a1a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 27 Jul 2021 18:58:24 +0200 Subject: [PATCH 2/6] Fix variable names issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré --- skl2onnx/algebra/graph_state.py | 1032 +++++------ skl2onnx/common/_container.py | 1522 +++++++++-------- ...st_algebra_onnx_operators_sub_estimator.py | 2 +- 3 files changed, 1286 insertions(+), 1270 deletions(-) diff --git a/skl2onnx/algebra/graph_state.py b/skl2onnx/algebra/graph_state.py index 9cfa74032..690adbff8 100644 --- a/skl2onnx/algebra/graph_state.py +++ b/skl2onnx/algebra/graph_state.py @@ -1,512 +1,520 @@ -# SPDX-License-Identifier: Apache-2.0 - -import numpy as np -from scipy.sparse import coo_matrix -from ..proto import onnx_proto, TensorProto -from ..common.data_types import ( - guess_proto_type, _guess_numpy_type, _guess_type_proto_str, - _guess_type_proto, FloatType, DoubleType, Int64Type, copy_type) -from ..common._topology import Variable -from ..common._registration import get_shape_calculator, get_converter - - -class GraphStateVar: - pass - - -class GraphState: - - def __init__(self, inputs, output_names, operator_name, scope, - container, converter, onnx_prefix_name=None, - options=None, expected_inputs=None, - expected_outputs=None, input_range=None, - output_range=None, operator=None, - run_converters=False, **attrs): - self.inputs = inputs - self._output_names = output_names - self._input_range = input_range.copy() if input_range else [1, 1e9] - self._output_range = output_range.copy() if output_range else [1, 1e9] - self.scope = scope - self.run_converters = run_converters - self.operator = operator - if hasattr(operator_name, 'fit'): - from .. import get_model_alias - self.operator_instance = operator_name - self.is_model = True - self.operator_name = get_model_alias(type(operator_name)) - else: - self.operator_name = operator_name - self.is_model = False - self.container = container - self.converter = converter - self._expected_inputs = ( - None if expected_inputs is None else expected_inputs.copy()) - self._expected_outputs = ( - None if expected_outputs is None else expected_outputs.copy()) - self.computed_inputs_ = None - self.computed_outputs_ = None - self.sub_op_ = None - self.onnx_prefix_name = onnx_prefix_name - self.attrs = attrs - self.options = options - - for att in ['inputs', '_expected_inputs', - '_expected_outputs', 'computed_inputs_', - 'computed_outputs_', '_outputs']: - v = getattr(self, att, None) - if v is None: - continue - if not isinstance(v, list): - raise TypeError( - "Attribute %r must be a list not %r." - "" % (att, type(v))) - for i, vi in enumerate(v): - if hasattr(vi, 'state') or hasattr(vi, 'onx_op'): - continue - if not isinstance(vi, (tuple, str, Variable, GraphStateVar)): - raise TypeError( - "Unexpected type %r for element %d of attribute %r " - "in %r." % (type(vi), i, att, v)) - if isinstance(vi, tuple) and len(vi) != 2: - raise ValueError( - "Unexpected value %r for element %d of attribute %r." - "" % (vi, i, att)) - change = [] - for vi in v: - change.append((vi, None) if isinstance(vi, str) else vi) - - if self._output_names is not None: - res = [] - if self._expected_outputs is not None: - for i in range(0, len(self._expected_outputs)): - if i < len(self._output_names): - res.append( - (self._output_names[i], - self._expected_outputs[i][1])) - else: - res.append(self._expected_outputs[i]) - for i in range(len(res), len(self._output_names)): - res.append((self._output_names[i], None)) - self._expected_outputs = res - - if self._expected_outputs is not None: - res = [] - for p in self._expected_outputs: - if isinstance(p[1], str) and p[1].startswith('tensor('): - res.append((p[0], _guess_type_proto_str(p[1], None))) - else: - res.append(p) - self._expected_outputs = res - - if self._expected_inputs is not None: - res = [] - for p in self._expected_inputs: - if isinstance(p[1], str) and p[1].startswith('tensor('): - res.append((p[0], _guess_type_proto_str(p[1], None))) - else: - res.append(p) - self._expected_inputs = res - - @property - def onnx_prefix(self): - if self.onnx_prefix_name is None: - return self.operator_name - return self.onnx_prefix_name + "_" + self.operator_name - - @property - def outputs(self): - self.run() - return self.computed_outputs_ - - def _get_var_name(self, var, in_out, operator=None, index=None): - "input: True for output, False for input" - if hasattr(var, 'add_to'): - var.add_to(self.scope, self.container, operator=operator, - run_converters=self.run_converters) - outputs = var.outputs - if isinstance(outputs, list): - vars = [] - for var in outputs: - if isinstance(var, (Variable, tuple)): - vars.append(var) - elif isinstance(var, str): - vars.append((var, None)) - if len(vars) == 0: - raise RuntimeError( - "Empty inputs outputs=%s var=%s in_out=%s " - "operator=%r." % (outputs, var, in_out, operator)) - return vars - raise RuntimeError("Unexpected output type {}".format(outputs)) - - def __fct__(var, operator): - if isinstance(var, Variable): - return [var] - if isinstance(var, (np.ndarray, np.bool_, np.int64, - np.float32, np.float64, - np.int8, np.uint8)): - return [self._add_constant(var)] - if hasattr(var, 'ConstantValue'): - return [ - self._add_constant(var.ConstantValue, scope=self.scope)] - if isinstance(var, str): - return [(var, None)] - if isinstance(var, tuple) and len(var) == 2: - return [var] - try: - a, b = var - return [(a, b)] - except ValueError: - pass - raise RuntimeError("Unexpected type for parameter 'var': {0}." - "".format(type(var))) - - try: - v = __fct__(var, operator) - except TypeError as e: - raise RuntimeError( - "Unable to process one variable %s and operator=%s " - "(name=%r)." % (var, operator, self.operator_name)) from e - if v is None or not isinstance(v, list) or len(v) == 0: - raise TypeError( - "Unexpected type or empty value %r - %s." % (type(v), v)) - if in_out and self._output_names is not None and index is not None: - if len(v) != 1: - raise RuntimeError( - "Mismatch number of outputs between %s and %s." % ( - v, self._output_names[index])) - v2 = self.scope.get(var[0], None) - if v2 is not None: - v = [v2] - try: - vn = v[0][0] - except IndexError as e: - raise ValueError( - "Unexpected output %s in operator name %r." - "" % (vn, self.operator_name)) from e - if (index >= len(self._output_names) and - index >= self._output_range[0]): - return None - try: - vin = self._output_names[index] - except IndexError as e: - raise ValueError( - "Unexpected index %s in operator name %r with ." - "output names %s." % ( - index, self.operator_name, - self._output_names)) from e - if vn != vin: - raise RuntimeError( - "Mismatched output name %r between %s and %s." % ( - vn, v, vin)) - return v - - def _add_constant(self, cst, scope): - - def _ty_astype(cst): - astype = cst.dtype - try: - ty = guess_proto_type(_guess_numpy_type(cst.dtype, cst.shape)) - except NotImplementedError as e: - st = str(astype).lower() - if st.startswith('u') or st.startswith("= len(new_inputs) and - j >= input_range[0]): - continue - if new_inputs[j].type is not None: - new_inputs[i].type = ( - new_inputs[j].type.__class__()) - break - - return new_inputs - - @staticmethod - def _update_contraints(vars1, expected1, vars2, expected2, debug=None): - memo = {} - for va, ex in [(vars1, expected1), (vars2, expected2)]: - if va is None or ex is None: - continue - for v, ct in zip(va, ex): - if (isinstance(v, str) or ( - hasattr(v, 'type') and v.type is None)): - continue - vt = (copy_type(v.type) - if hasattr(v, 'type') else copy_type(v[1])) - if isinstance(vt, str): - continue - key = ct[1] - if isinstance(key, str) and key[0] in ('T', 'I', 'V'): - if not isinstance(vt, str) and key not in memo: - memo[key] = [] - memo[key].append(vt) - - for k, v in memo.items(): - if len(set(_.__class__ for _ in v)) != 1: - raise RuntimeError( - "Conflicted constraint %r, got types %r operator=%s" - "." % (k, v, debug)) - for i in range(0, len(vars1)): - inp = vars1[i] - if isinstance(inp, str): - continue - if hasattr(inp, 'type') and inp.type is None: - ct = expected1[i][1] - if ct in memo: - vars1[i].type = copy_type(memo[ct][0]) - elif isinstance(inp, tuple): - ct = expected1[i][1] - if ct in memo: - vars1[i] = (inp[0], copy_type(memo[ct][0])) - - def run(self): - if self.computed_outputs_ is None: - if self.operator is not None: - expected_outputs = self.operator.outputs - else: - if self._expected_outputs is not None: - eoli = [] - for i, o in enumerate(self._expected_outputs): - v = self._get_var_name(o, True, index=i) - if v is None: - continue - eoli.extend(v) - expected_outputs = eoli - else: - expected_outputs = None - - inputs = [] - for i in self.inputs: - v = self._get_var_name(i, False, index=None) - inputs.extend(v) - - self.computed_inputs_ = GraphState._update_inputs( - self.inputs, inputs, scope=self.scope, - expected_inputs=self._expected_inputs, - input_range=self._input_range) - - name = self.scope.get_unique_operator_name(self.onnx_prefix) - if self.is_model: - if self.sub_op_ is not None: - raise NotImplementedError( - "Attribute 'sub_op_' is not empty.") - - # a model is converted into a subgraph - sub_op_inputs = self.computed_inputs_ - - # output are not defined, we need to call a parser. - from .._parse import _parse_sklearn - self.scope.add_options( - id(self.operator_instance), self.options) - sub_outputs = _parse_sklearn( - self.scope, self.operator_instance, sub_op_inputs) - set_input_names = set(v.onnx_name for v in sub_op_inputs) - sub_op = None - for op in self.scope.operators.values(): - for inp in op.inputs: - if inp.onnx_name in set_input_names: - sub_op = op - if (sub_outputs is None or - None in sub_outputs): - raise RuntimeError( - "Wrong result when parsing model {}.".format( - type(self.operator_instance))) - - # Checks operator outputs - for out in sub_outputs: - if not isinstance(out, Variable): - raise TypeError( - "Output %s must be of type Variable." % out) - self.sub_op_ = sub_op - sub_op.outputs = sub_outputs - - shape_calc = get_shape_calculator(self.operator_name) - shape_calc(sub_op) - - # Add Identity nodes to be consistent with `is_fed` - # in Topology. - if expected_outputs is not None: - outputs = [ - self._get_output_name( - self._output_names, o, self.scope) - for o in expected_outputs] - else: - outputs = [ - self.scope.declare_local_variable( - o.onnx_name, type=o.type) - for o in sub_op.outputs] - if len(outputs) != len(sub_op.outputs): - raise RuntimeError( - "Mismatched number of outputs %s and %s." % ( - outputs, sub_op.outputs)) - - for i, out in enumerate(sub_op.outputs): - var = outputs[i] - self.container.add_node( - 'Identity', [out.onnx_name], [var[0]], - name=self.scope.get_unique_operator_name("SubOpId")) - self.computed_outputs_ = outputs - self.computed_inputs2_ = sub_op.inputs - self.computed_outputs2_ = [ - (v.raw_name, v.type) for v in self.computed_outputs_] - - if self.run_converters: - # The parser was run on sub-operators but not the - # converter. - conv = get_converter(self.operator_name) - conv(self.scope, sub_op, self.container) - else: - # only one node is added - if self.options is not None: - raise RuntimeError( - "Options must be empty for node %r but is it %r." % ( - self.operator_name, self.options)) - outputs = [ - self._get_output_name(self._output_names, o, self.scope) - for o in expected_outputs] - input_names = [i[0] for i in inputs] - output_names = [i[0] for i in outputs] - self.container.add_node( - self.operator_name, input_names, output_names, - name=name, **self.attrs) - self.computed_outputs_ = [ - (name, ct[1]) for name, ct in zip( - output_names, self._expected_outputs)] - self._update_contraints( - self.computed_outputs_, self._expected_outputs, - self.computed_inputs_, self._expected_inputs, - debug=self.operator_name) +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +from scipy.sparse import coo_matrix +from ..proto import onnx_proto, TensorProto +from ..common.data_types import ( + guess_proto_type, _guess_numpy_type, _guess_type_proto_str, + _guess_type_proto, FloatType, DoubleType, Int64Type, copy_type) +from ..common._topology import Variable +from ..common._registration import get_shape_calculator, get_converter + + +class GraphStateVar: + pass + + +class GraphState: + + def __init__(self, inputs, output_names, operator_name, scope, + container, converter, onnx_prefix_name=None, + options=None, expected_inputs=None, + expected_outputs=None, input_range=None, + output_range=None, operator=None, + run_converters=False, **attrs): + self.inputs = inputs + self._output_names = output_names + self._input_range = input_range.copy() if input_range else [1, 1e9] + self._output_range = output_range.copy() if output_range else [1, 1e9] + self.scope = scope + self.run_converters = run_converters + self.operator = operator + if hasattr(operator_name, 'fit'): + from .. import get_model_alias + self.operator_instance = operator_name + self.is_model = True + self.operator_name = get_model_alias(type(operator_name)) + else: + self.operator_name = operator_name + self.is_model = False + self.container = container + self.converter = converter + self._expected_inputs = ( + None if expected_inputs is None else expected_inputs.copy()) + self._expected_outputs = ( + None if expected_outputs is None else expected_outputs.copy()) + self.computed_inputs_ = None + self.computed_outputs_ = None + self.sub_op_ = None + self.onnx_prefix_name = onnx_prefix_name + self.attrs = attrs + self.options = options + + for att in ['inputs', '_expected_inputs', + '_expected_outputs', 'computed_inputs_', + 'computed_outputs_', '_outputs']: + v = getattr(self, att, None) + if v is None: + continue + if not isinstance(v, list): + raise TypeError( + "Attribute %r must be a list not %r." + "" % (att, type(v))) + for i, vi in enumerate(v): + if hasattr(vi, 'state') or hasattr(vi, 'onx_op'): + continue + if not isinstance(vi, (tuple, str, Variable, GraphStateVar)): + raise TypeError( + "Unexpected type %r for element %d of attribute %r " + "in %r." % (type(vi), i, att, v)) + if isinstance(vi, tuple) and len(vi) != 2: + raise ValueError( + "Unexpected value %r for element %d of attribute %r." + "" % (vi, i, att)) + change = [] + for vi in v: + change.append((vi, None) if isinstance(vi, str) else vi) + + if self._output_names is not None: + res = [] + if self._expected_outputs is not None: + for i in range(0, len(self._expected_outputs)): + if i < len(self._output_names): + res.append( + (self._output_names[i], + self._expected_outputs[i][1])) + else: + res.append(self._expected_outputs[i]) + for i in range(len(res), len(self._output_names)): + res.append((self._output_names[i], None)) + self._expected_outputs = res + + if self._expected_outputs is not None: + res = [] + for p in self._expected_outputs: + if isinstance(p[1], str) and p[1].startswith('tensor('): + res.append((p[0], _guess_type_proto_str(p[1], None))) + else: + res.append(p) + self._expected_outputs = res + + if self._expected_inputs is not None: + res = [] + for p in self._expected_inputs: + if isinstance(p[1], str) and p[1].startswith('tensor('): + res.append((p[0], _guess_type_proto_str(p[1], None))) + else: + res.append(p) + self._expected_inputs = res + + @property + def onnx_prefix(self): + if self.onnx_prefix_name is None: + return self.operator_name + return self.onnx_prefix_name + "_" + self.operator_name + + @property + def outputs(self): + self.run() + return self.computed_outputs_ + + def _get_var_name(self, var, in_out, operator=None, index=None): + "input: True for output, False for input" + if hasattr(var, 'add_to'): + var.add_to(self.scope, self.container, operator=operator, + run_converters=self.run_converters) + outputs = var.outputs + if isinstance(outputs, list): + vars = [] + for var in outputs: + if isinstance(var, (Variable, tuple)): + vars.append(var) + elif isinstance(var, str): + vars.append((var, None)) + if len(vars) == 0: + raise RuntimeError( + "Empty inputs outputs=%s var=%s in_out=%s " + "operator=%r." % (outputs, var, in_out, operator)) + return vars + raise RuntimeError("Unexpected output type {}".format(outputs)) + + def __fct__(var, operator): + if isinstance(var, Variable): + return [var] + if isinstance(var, (np.ndarray, np.bool_, np.int64, + np.float32, np.float64, + np.int8, np.uint8)): + return [self._add_constant(var)] + if hasattr(var, 'ConstantValue'): + return [ + self._add_constant(var.ConstantValue, scope=self.scope)] + if isinstance(var, str): + return [(var, None)] + if isinstance(var, tuple) and len(var) == 2: + return [var] + try: + a, b = var + return [(a, b)] + except ValueError: + pass + raise RuntimeError("Unexpected type for parameter 'var': {0}." + "".format(type(var))) + + try: + v = __fct__(var, operator) + except TypeError as e: + raise RuntimeError( + "Unable to process one variable %s and operator=%s " + "(name=%r)." % (var, operator, self.operator_name)) from e + if v is None or not isinstance(v, list) or len(v) == 0: + raise TypeError( + "Unexpected type or empty value %r - %s." % (type(v), v)) + if in_out and self._output_names is not None and index is not None: + if len(v) != 1: + raise RuntimeError( + "Mismatch number of outputs between %s and %s." % ( + v, self._output_names[index])) + v2 = self.scope.get(var[0], None) + if v2 is not None: + v = [v2] + try: + vn = v[0][0] + except IndexError as e: + raise ValueError( + "Unexpected output %s in operator name %r." + "" % (vn, self.operator_name)) from e + if (index >= len(self._output_names) and + index >= self._output_range[0]): + return None + try: + vin = self._output_names[index] + except IndexError as e: + raise ValueError( + "Unexpected index %s in operator name %r with ." + "output names %s." % ( + index, self.operator_name, + self._output_names)) from e + if vn != vin: + raise RuntimeError( + "Mismatched output name %r between %s and %s." % ( + vn, v, vin)) + return v + + def _add_constant(self, cst, scope): + + def _ty_astype(cst): + astype = cst.dtype + try: + ty = guess_proto_type(_guess_numpy_type(cst.dtype, cst.shape)) + except NotImplementedError as e: + st = str(astype).lower() + if st.startswith('u') or st.startswith("= len(new_inputs) and + j >= input_range[0]): + continue + if new_inputs[j].type is not None: + new_inputs[i].type = ( + new_inputs[j].type.__class__()) + break + + return new_inputs + + @staticmethod + def _update_contraints(vars1, expected1, vars2, expected2, debug=None): + memo = {} + for va, ex in [(vars1, expected1), (vars2, expected2)]: + if va is None or ex is None: + continue + for v, ct in zip(va, ex): + if (isinstance(v, str) or ( + hasattr(v, 'type') and v.type is None)): + continue + vt = (copy_type(v.type) + if hasattr(v, 'type') else copy_type(v[1])) + if isinstance(vt, str): + continue + key = ct[1] + if isinstance(key, str) and key[0] in ('T', 'I', 'V'): + if not isinstance(vt, str) and key not in memo: + memo[key] = [] + memo[key].append(vt) + + for k, v in memo.items(): + if len(set(_.__class__ for _ in v)) != 1: + raise RuntimeError( + "Conflicted constraint %r, got types %r operator=%s" + "." % (k, v, debug)) + for i in range(0, len(vars1)): + inp = vars1[i] + if isinstance(inp, str): + continue + if hasattr(inp, 'type') and inp.type is None: + ct = expected1[i][1] + if ct in memo: + vars1[i].type = copy_type(memo[ct][0]) + elif isinstance(inp, tuple): + ct = expected1[i][1] + if ct in memo: + vars1[i] = (inp[0], copy_type(memo[ct][0])) + + def run(self): + if self.computed_outputs_ is None: + if self.operator is not None: + expected_outputs = self.operator.outputs + else: + if self._expected_outputs is not None: + eoli = [] + for i, o in enumerate(self._expected_outputs): + v = self._get_var_name(o, True, index=i) + if v is None: + continue + eoli.extend(v) + expected_outputs = eoli + else: + expected_outputs = None + + inputs = [] + for i in self.inputs: + v = self._get_var_name(i, False, index=None) + inputs.extend(v) + + self.computed_inputs_ = GraphState._update_inputs( + self.inputs, inputs, scope=self.scope, + expected_inputs=self._expected_inputs, + input_range=self._input_range) + + name = self.scope.get_unique_operator_name(self.onnx_prefix) + if self.is_model: + if self.sub_op_ is not None: + raise NotImplementedError( + "Attribute 'sub_op_' is not empty.") + + # a model is converted into a subgraph + sub_op_inputs = self.computed_inputs_ + + # output are not defined, we need to call a parser. + from .._parse import _parse_sklearn + self.scope.add_options( + id(self.operator_instance), self.options) + sub_outputs = _parse_sklearn( + self.scope, self.operator_instance, sub_op_inputs) + set_input_names = set(v.onnx_name for v in sub_op_inputs) + sub_op = None + for op in self.scope.operators.values(): + for inp in op.inputs: + if inp.onnx_name in set_input_names: + sub_op = op + if (sub_outputs is None or + None in sub_outputs): + raise RuntimeError( + "Wrong result when parsing model {}.".format( + type(self.operator_instance))) + + # Checks operator outputs + for out in sub_outputs: + if not isinstance(out, Variable): + raise TypeError( + "Output %s must be of type Variable." % out) + self.sub_op_ = sub_op + sub_op.outputs = sub_outputs + + shape_calc = get_shape_calculator(self.operator_name) + shape_calc(sub_op) + + # Add Identity nodes to be consistent with `is_fed` + # in Topology. + if expected_outputs is not None: + outputs = [ + self._get_output_name( + self._output_names, o, self.scope) + for o in expected_outputs] + else: + outputs = [ + self.scope.declare_local_variable( + o.onnx_name, type=o.type) + for o in sub_op.outputs] + if len(outputs) != len(sub_op.outputs): + raise RuntimeError( + "Mismatched number of outputs %s and %s." % ( + outputs, sub_op.outputs)) + + for i, out in enumerate(sub_op.outputs): + var = outputs[i] + self.container.add_node( + 'Identity', [out.onnx_name], [var[0]], + name=self.scope.get_unique_operator_name("SubOpId")) + self.computed_outputs_ = outputs + self.computed_inputs2_ = sub_op.inputs + self.computed_outputs2_ = [ + (v.raw_name, v.type) for v in self.computed_outputs_] + + if self.run_converters: + # The parser was run on sub-operators but not the + # converter. + conv = get_converter(self.operator_name) + conv(self.scope, sub_op, self.container) + else: + # only one node is added + if self.options is not None: + raise RuntimeError( + "Options must be empty for node %r but is it %r." % ( + self.operator_name, self.options)) + outputs = [ + self._get_output_name(self._output_names, o, self.scope) + for o in expected_outputs] + input_names = [i[0] for i in inputs] + output_names = [i[0] for i in outputs] + self.container.add_node( + self.operator_name, input_names, output_names, + name=name, **self.attrs) + computed_outputs = [ + (name, ct[1]) for name, ct in zip( + output_names, self._expected_outputs)] + self._update_contraints( + computed_outputs, self._expected_outputs, + self.computed_inputs_, self._expected_inputs, + debug=self.operator_name) + + # Registers the variables into scope. + self.computed_outputs_ = [] + for name, kind in computed_outputs: + var = self.scope.declare_local_variable(name, kind) + var.onnx_name = name # name already comes from scope.get_unique_variable_name + var.is_fed = True + self.computed_outputs_.append(var) diff --git a/skl2onnx/common/_container.py b/skl2onnx/common/_container.py index 3beedaf0e..1fc00b995 100644 --- a/skl2onnx/common/_container.py +++ b/skl2onnx/common/_container.py @@ -1,757 +1,765 @@ -# SPDX-License-Identifier: Apache-2.0 - - -import inspect -import re -import sys -import traceback -import warnings -import numpy as np -from scipy.sparse import coo_matrix -from onnx.defs import onnx_opset_version, get_all_schemas_with_history -import onnx.onnx_cpp2py_export.defs as C -from onnxconverter_common.onnx_ops import __dict__ as dict_apply_operation -from ..proto import TensorProto -from ..proto.onnx_helper_modified import ( - make_node, ValueInfoProto, make_tensor, make_attribute -) -try: - from ..proto import SparseTensorProto - from ..proto.onnx_helper_modified import make_sparse_tensor -except ImportError: - # onnx is too old. - SparseTensorProto = None - make_sparse_tensor = None -from .interface import ModelContainer -from .utils import get_domain - - -def _get_operation_list(): - """ - Investigates this module to extract all ONNX functions - which needs to be converted with these functions. - """ - regs = [re.compile("container.add_node[(]'([A-Z][a-zA-Z0-9]*)', " - "\\[?input_name"), - re.compile("container.add_node[(]'([A-Z][a-zA-Z0-9]*)', " - "\\[\\]"), - re.compile("container.add_node[(]'([A-Z][a-zA-Z0-9]*)', " - "inputs"), - re.compile("scope, '([A-Z][a-zA-Z0-9]*)', \\[?input_name"), - re.compile("op_type = '([A-Z][a-zA-Z0-9]*)'")] - res = {} - for k, v in dict_apply_operation.items(): - if k.startswith("apply_") and callable(v): - found = None - source = inspect.getsource(v) - for reg in regs: - g = reg.search(source) - if g: - found = g.groups()[0] - break - if found is None: - continue - res[found] = v - return res - - -def _build_options(model, defined_options, default_values, - allowed_options, fail): - opts = {} if default_values is None else default_values - if defined_options is not None: - opts.update(defined_options.get(type(model), {})) - opts.update(defined_options.get(id(model), {})) - if allowed_options not in (None, 'passthrough'): - for k, v in opts.items(): - if k not in allowed_options: - if fail: - raise NameError( - "Option '{}' not in {} for class '{}'.".format( - k, list(sorted(allowed_options)), - model.__class__.__name__)) - return None - allowed = allowed_options[k] - if allowed is not None and v not in allowed and v is not None: - raise ValueError( - "Unexpected value [{!r}] for option '{}'" - " (it must be in {}) for model '{}'.".format( - v, k, allowed, model.__class__.__name__)) - elif fail and len(opts) != 0 and allowed_options != 'passthrough': - raise RuntimeError( - "Options {} are not registerd for model '{}'.".format( - list(sorted(opts)), model.__class__.__name__)) - return opts - - -_apply_operation_specific = _get_operation_list() - - -class _WhiteBlackContainer: - - def __init__(self, white_op=None, black_op=None): - self._white_op = white_op - self._black_op = black_op - - def is_allowed(self, node_type): - """ - Tells if a node is white listed or not black listed. - """ - if isinstance(node_type, (list, tuple, set)): - return all(map(self.is_allowed, node_type)) - try: - self.check_white_black_list(node_type) - return True - except RuntimeError: - return False - - def check_white_black_list(self, node_type): - """ - Checks a node type is allowed according to white - and black lists. - """ - if self._white_op: - if node_type not in self._white_op: - raise RuntimeError( - "Operator '{}' is not white listed.".format(node_type)) - if self._black_op: - if node_type in self._black_op: - raise RuntimeError( - "Operator '{}' is black listed.".format(node_type)) - - -class RawModelContainerNode(_WhiteBlackContainer): - """ - This node is the carrier of the model we want to convert. - It provides an abstract layer so that our parsing - framework can work with models generated by different tools. - """ - - def __init__(self, raw_model, white_op=None, black_op=None): - """ - :param raw_model: *scikit-learn* model to convert - """ - _WhiteBlackContainer.__init__( - self, white_op=white_op, black_op=black_op) - self._raw_model = raw_model - - @property - def raw_model(self): - return self._raw_model - - @property - def input_names(self): - """ - This function should return a list of strings. Each string - corresponds to an input variable name. - :return: a list of string - """ - raise NotImplementedError() - - @property - def output_names(self): - """ - This function should return a list of strings. Each string - corresponds to an output variable name. - :return: a list of string - """ - raise NotImplementedError() - - -class SklearnModelContainerNode(RawModelContainerNode): - """ - Main container for one *scikit-learn* model. - Every converter adds nodes to an existing container - which is converted into a *ONNX* graph by an instance of - :class:`Topology `. - """ - - def __init__(self, sklearn_model, white_op=None, black_op=None): - super(SklearnModelContainerNode, self).__init__( - sklearn_model, white_op=white_op, black_op=black_op) - # Scikit-learn models have no input and output specified, - # so we create them and store them in this container. - self._inputs = [] - self._outputs = [] - - @property - def input_names(self): - return [variable.onnx_name for variable in self._inputs] - - @property - def output_names(self): - return [variable.onnx_name for variable in self._outputs] - - def add_input(self, variable): - # The order of adding variables matters. The final model's - # input names are sequentially added as this list - if variable not in self._inputs: - self._inputs.append(variable) - - def add_output(self, variable): - # The order of adding variables matters. The final model's - # output names are sequentially added as this list - if variable not in self._outputs: - self._outputs.append(variable) - - -class ModelComponentContainer(ModelContainer, _WhiteBlackContainer): - """ - In the conversion phase, this class is used to collect all materials - required to build an *ONNX* *GraphProto*, which is encapsulated in a - *ONNX* *ModelProto*. - """ - - def __init__(self, target_opset, options=None, registered_models=None, - white_op=None, black_op=None): - """ - :param target_opset: number, for example, 7 for *ONNX 1.2*, and - 8 for *ONNX 1.3*. - :param options: see :ref:`l-conv-options` - :param registered_models: registered models - :param white_op: white list of ONNX nodes allowed - while converting a pipeline, if empty, all are allowed - :param black_op: black list of ONNX nodes allowed - while converting a pipeline, if empty, none are blacklisted - """ - _WhiteBlackContainer.__init__( - self, white_op=white_op, black_op=black_op) - # Inputs of ONNX graph. They are ValueInfoProto in ONNX. - self.inputs = [] - # Outputs of ONNX graph. They are ValueInfoProto in ONNX. - self.outputs = [] - # ONNX tensors (type: TensorProto). They are initializers of - # ONNX GraphProto. - self.initializers = [] - self.initializers_strings = {} - # Intermediate variables in ONNX computational graph. They are - # ValueInfoProto in ONNX. - self.value_info = [] - # ONNX nodes (type: NodeProto) used to define computation - # structure - self.nodes = [] - # ONNX operators' domain-version pair set. They will be added - # into opset_import field in the final ONNX model. - self.node_domain_version_pair_sets = set() - # The targeted ONNX operator set (referred to as opset) that - # matches the ONNX version. - if isinstance(target_opset, dict): - self.target_opset_all = target_opset - self.target_opset = target_opset.get('', None) - else: - self.target_opset = target_opset - self.target_opset_all = {'': target_opset} - # Additional options given to converters. - self.options = options - # All registered models. - self.registered_models = registered_models - - def __str__(self): - """ - Shows internal information. - """ - rows = [] - if self.inputs: - rows.append("INPUTS") - for inp in self.inputs: - rows.append( - " " + str(inp).replace(" ", "").replace("\n", " ")) - if self.outputs: - rows.append("OUTPUTS") - for out in self.outputs: - rows.append( - " " + str(out).replace(" ", "").replace("\n", " ")) - if self.initializers: - rows.append("INITIALIZERS") - for ini in self.initializers: - rows.append( - " " + str(ini).replace(" ", "").replace("\n", " ")) - if self.value_info: - rows.append("NODES") - for val in self.value_info: - rows.append( - " " + str(val).replace(" ", "").replace("\n", " ")) - if self.nodes: - rows.append("PROTO") - for nod in self.nodes: - rows.append( - " " + str(nod).replace(" ", "").replace("\n", " ")) - return "\n".join(rows) - - def _make_value_info(self, variable): - value_info = ValueInfoProto() - value_info.name = variable.full_name - value_info.type.CopyFrom(variable.type.to_onnx_type()) - if variable.type.doc_string: - value_info.doc_string = variable.type.doc_string - return value_info - - def add_input(self, variable): - """ - Adds our *Variable* object defined _parser.py into the the input - list of the final ONNX model. - - :param variable: The Variable object to be added - """ - self.inputs.append(self._make_value_info(variable)) - - def add_output(self, variable): - """ - Adds our *Variable* object defined *_parser.py* into the the - output list of the final ONNX model. - - :param variable: The Variable object to be added - """ - self.outputs.append(self._make_value_info(variable)) - - def add_options(self, model_id, options): - """ - Adds an option, for example, - ``add_options(id(clr), {'raw_scores': True})`` - tells the converter associated to ``clr`` to - use raw score instead of probabilities. - - :param model_id: class or ``id(instance)`` - :param options: dictionary with the new values - """ - if options is None: - return - if self.options is None: - self.options = {} - if model_id not in self.options: - self.options[model_id] = None - if self.options[model_id] is None: - self.options[model_id] = {} - self.options[model_id].update(options) - - def add_initializer(self, name, onnx_type, shape, content): - """ - Adds a *TensorProto* into the initializer list of the final - ONNX model. - - :param name: Variable name in the produced ONNX model. - :param onnx_type: Element types allowed in ONNX tensor, e.g., - TensorProto.FLOAT and TensorProto.STRING. - :param shape: Tensor shape, a list of integers. - :param content: Flattened tensor values (i.e., a float list - or a float array). - :return: created tensor - """ - sparse_tensor = None - tensor = None - - cached_value = None - if isinstance(content, TensorProto): - tensor = TensorProto() - tensor.data_type = content.data_type - tensor.name = name - tensor.raw_data = content.raw_data - tensor.dims.extend(content.dims) - elif shape is None and isinstance( - content, (np.float32, np.float64, np.int32, - np.int64, float, np.int8, np.uint8, - np.bool_)): - tensor = make_tensor(name, onnx_type, [], [content]) - elif (SparseTensorProto is not None and - isinstance(content, SparseTensorProto)): - raise NotImplementedError("Not implemented yet.") - elif shape is None: - tensor = make_attribute(name, content) - elif isinstance(content, coo_matrix): - if SparseTensorProto is None: - raise RuntimeError( - "Sparse matrices require SparseTensorProto. Update onnx.") - values_tensor = make_tensor( - name + "_v", data_type=onnx_type, - dims=(len(content.data), ), vals=content.data) - indices = [i * content.shape[1] + j - for i, j in zip(content.row, content.col)] - indices_tensor = make_tensor( - name=name + "_i", data_type=TensorProto.INT64, - dims=(len(indices), ), vals=indices) - dense_shape = list(content.shape) - sparse_tensor = make_sparse_tensor( - values_tensor, indices_tensor, dense_shape) - - # cached value: same without names - values_tensor = make_tensor( - "_v", data_type=onnx_type, - dims=(len(content.data), ), vals=content.data) - indices_tensor = make_tensor( - name="_i", data_type=TensorProto.INT64, - dims=(len(indices), ), vals=indices) - cached_value = make_sparse_tensor( - values_tensor, indices_tensor, dense_shape) - - else: - if any(d is None for d in shape): - raise ValueError('Shape of initializer cannot contain None.') - if (hasattr(content, 'dtype') and - content.dtype in (bool, np.bool_)): - content = content.astype(np.int32) - try: - tensor = make_tensor(name, onnx_type, shape, content) - except TypeError as e: - raise TypeError( - "Unable to make a tensor name=%r " - "onnx_type=%r shape=%r content-type=%r." % ( - name, onnx_type, shape, type(content))) from e - - if tensor is not None: - if cached_value is None: - name = tensor.name - tensor.name = "tensor" - content = tensor.SerializeToString() - tensor.name = name - else: - content = cached_value.SerializeToString() - cached_name = self.initializers_strings.get(content, None) - if cached_name is None: - self.initializers_strings[content] = name - self.initializers.append(tensor) - return tensor - - self.add_node( - 'Identity', cached_name, name, op_version=self.target_opset, - name=name + '_op') - return name - - if sparse_tensor is not None: - content = cached_value.SerializeToString() - cached_name = self.initializers_strings.get(content, None) - if cached_name is None: - self.initializers_strings[content] = name - self.add_node( - 'Constant', [], [name], sparse_value=sparse_tensor, - op_version=self.target_opset, name=name + '_op') - return sparse_tensor - - self.add_node( - 'Identity', cached_name, name, op_version=self.target_opset, - name=name + '_op') - return name - - raise RuntimeError( - "Either tensor or sparse_tensor should be defined.") - - def add_value_info(self, variable): - self.value_info.append(self._make_value_info(variable)) - - def _check_operator(self, op_type): - """ - Checks that if *op_type* is one of the operators defined in - :mod:`skl2onnx.common._apply_container`, then it was called - from a function defined in this submodule by looking - into the callstack. The test is enabled for *python >= 3.6*. - """ - if (op_type in _apply_operation_specific and - sys.version_info[:2] >= (3, 6)): - tb = traceback.extract_stack() - operation = [] - fct = _apply_operation_specific[op_type] - skl2 = False - for b in tb: - if "_apply_operation" in b.filename and b.name == fct.__name__: - operation.append(b) - if not skl2 and "skl2onnx" in b.filename: - skl2 = True - if skl2 and len(operation) == 0: - raise RuntimeError( - "Operator '{0}' should be added with function " - "'{1}' in submodule _apply_operation.".format( - op_type, fct.__name__)) - self.check_white_black_list(op_type) - - def add_node(self, op_type, inputs, outputs, op_domain='', op_version=None, - name=None, **attrs): - """ - Adds a *NodeProto* into the node list of the final ONNX model. - If the input operator's domain-version information cannot be - found in our domain-version pool (a Python set), we may add it. - - :param op_type: A string (e.g., Pool and Conv) indicating the - type of the NodeProto - :param inputs: A list of strings. They are the input variables' - names of the considered NodeProto - :param outputs: A list of strings. They are the output - variables' names of the considered NodeProto - :param op_domain: The domain name (e.g., ai.onnx.ml) of the - operator we are trying to add. - :param op_version: The version number (e.g., 0 and 1) of the - operator we are trying to add. - :param name: name of the node, this name cannot be empty - :param attrs: A Python dictionary. Keys and values are - attributes' names and attributes' values, - respectively. - """ - if name is None or not isinstance( - name, str) or name == '': - name = "N%d" % len(self.nodes) - existing_names = set(n.name for n in self.nodes) - if name in existing_names: - name += "-N%d" % len(self.nodes) - - if op_domain is None: - op_domain = get_domain() - self._check_operator(op_type) - if op_version is None: - op_version = self._get_op_version(op_domain, op_type) - - if isinstance(inputs, str): - inputs = [inputs] - if isinstance(outputs, str): - outputs = [outputs] - try: - common = set(inputs) & set(outputs) - except TypeError as e: - raise TypeError( - "inputs or outputs are wrong, inputs=%r, outputs=%r, node=%r." - "" % (inputs, outputs, op_type)) from e - if common: - raise RuntimeError( - "inputs and outputs cannot have " - "variables in common {} in node '{}' " - "with name '{}'.".format(common, op_type, name)) - if not isinstance(inputs, list) or not all( - isinstance(s, str) for s in inputs): - type_list = ','.join(list(str(type(s)) for s in inputs)) - raise ValueError('Inputs must be a list of string but get [%s]' - % type_list) - if (not isinstance(outputs, list) or - not all(isinstance(s, str) for s in outputs)): - type_list = ','.join(list(str(type(s)) for s in outputs)) - raise ValueError('Outputs must be a list of string but get [%s]' - % type_list) - upd = {} - dtypes = set() - for k, v in attrs.items(): - if v is None: - raise ValueError( - 'Failed to create ONNX node. Undefined ' - 'attribute pair (%s, %s) found for type %r and ' - 'version %r' % ( - k, v, op_type, op_version)) - if isinstance(v, np.ndarray): - upd[k] = v - dtypes.add(v.dtype) - - if upd: - attrs.update(upd) - if 'dtype' in attrs: - raise RuntimeError("dtype should not be a parameter.") - if len(dtypes) == 0: - dtype = None - elif len(dtypes) == 1: - dtype = list(dtypes)[0] - elif (np.float32 in dtypes and np.float64 in dtypes): - raise RuntimeError( - "Unable to select a dtype among {}.".format(dtypes)) - else: - dtype = None - try: - node = make_node(op_type, inputs, outputs, name=name, - _dtype=dtype, **attrs) - except ValueError as e: - raise ValueError("Unable to create node '{}' with name='{}'." - "".format(op_type, name)) from e - node.domain = op_domain - - self.node_domain_version_pair_sets.add((op_domain, op_version)) - self.nodes.append(node) - if (self.target_opset is not None and - op_version is not None and - op_version > self.target_opset_any_domain(op_domain)): - raise RuntimeError( - "Opset number {} is higher than targeted opsets {} for " - "node '{}' (domain: '{}').".format( - op_version, self.target_opset_all, - node.op_type, op_domain)) - - def target_opset_any_domain(self, domain): - target_opset = self.target_opset_all - if isinstance(target_opset, dict): - if domain in target_opset: - to = target_opset[domain] - else: - to = None - if to is None and domain == '': - to = onnx_opset_version() - if to is None: - smap = C.schema_version_map() - if domain in smap: - to = smap[domain][1] - if to is not None: - return to - # The domain is not registered in onnx, it is probably - # a custom domain. We assume the version is one. - return 1 - return self.target_opset - - @property - def target_opset_onnx(self): - return self.target_opset_any_domain('') - - def _get_op_version(self, domain, op_type): - """ - Determines the highest version of operator - *op_type* below or equal to *target_opset*. - """ - if not hasattr(self, '_op_versions'): - self._build_op_version() - key = domain, op_type - vers = self._op_versions.get(key, None) - if vers is None: - warnings.warn( - "Unable to find operator '{}' in domain '{}' in ONNX, " - "op_version is forced to 1.".format( - op_type, domain)) - vers = [1] - highest = self.target_opset_any_domain(domain) - pos = len(vers) - 1 - while pos >= 0: - if vers[pos] <= highest: - return vers[pos] - pos -= 1 - raise RuntimeError( - "Unable to find a suitable version for operator '{}' " - "in domain '{}'. Available versions: {}.".format( - op_type, domain, vers)) - - def _build_op_version(self): - res = {} - for schema in get_all_schemas_with_history(): - dom = schema.domain - name = schema.name - vers = schema.since_version - if (dom, name) not in res: - res[dom, name] = set() - res[dom, name].add(vers) - self._op_versions = {} - for k, v in res.items(): - self._op_versions[k] = list(sorted(v)) - - def _get_allowed_options(self, model): - if self.registered_models is not None: - if inspect.isfunction(model): - if model not in self.registered_models['aliases']: - return None - alias = self.registered_models['aliases'][model] - elif hasattr(model, 'alias'): - alias = model.alias - else: - if type(model) not in self.registered_models['aliases']: - return {} - alias = self.registered_models['aliases'][type(model)] - conv = self.registered_models['conv'][alias] - allowed = conv.get_allowed_options() - if allowed is None: - return {} - return allowed - clname = (str(model) if inspect.isfunction(model) - else model.__class__.__name__) - raise NotImplementedError( - "No registered models, no known allowed options " - "for model '{}'.".format(clname)) - - def validate_options(self, operator): - """ - Validates every operator allows the options - given by the user at converter time - for an operator. - """ - skl_op = operator.raw_operator - self.get_options(skl_op) - - def get_options(self, model, default_values=None, fail=True): - """ - Returns additional options for a model. - It first looks by class then by id (``id(model)``). - :param model: model being converted - :param default_values: default options (it is modified by - the function) - :param fail: fails if options not found - :return: dictionary - """ - return _build_options( - model, self.options, default_values, - self._get_allowed_options(model), fail=fail) - - def has_options(self, model, option_name): - """ - Tells if a model allows one specific options. - - :param model: model being converted - :return: boolean - """ - opts = self._get_allowed_options(model) - return option_name in opts - - def ensure_topological_order(self): - """ - Ensures and modifies the order of nodes to have - a topological order (every node in the list - can only be an input for a node later in this list). - The function raises an exception if a cycle is detected. - """ - order = {} - for inp in self.inputs: - name = inp.name - order[name] = 0 - for inp in self.initializers: - name = inp.name - order[name] = 0 - n_iter = 0 - while n_iter < len(self.nodes) * 2: - n_iter += 1 - missing_names = set() - missing_ops = [] - for node in self.nodes: - maxi = 0 - for name in node.input: - if name in order: - maxi = max(maxi, order[name]) - else: - maxi = None - missing_names.add(name) - break - if maxi is None: - missing_ops.append(node) - continue - key = id(node) - if key in order: - continue - maxi += 1 - order[key] = maxi - maxi += 1 - for name in node.output: - if name in order: - raise RuntimeError( - "Unable to sort a node (cycle). An output was " - "already ordered %r (iteration=%r)." % ( - name, n_iter)) - order[name] = maxi - if len(missing_names) == 0: - continue - - if len(missing_ops) > 0: - def nstr(name): - if name in order: - return "%s(%d)" % (name, order[name]) - return name - rows = ["%s (%s) -> (%s)" % ( - n.name or n.op_type, - ', '.join(map(nstr, n.input)), - ', '.join(n.output)) - for n in missing_ops] - rows.insert(0, "") - raise RuntimeError( - "After %d iterations for %d nodes, still unable " - "to sort names %r. The graph may be disconnected. " - "List of operators: %s" % ( - n_iter, len(self.nodes), missing_names, - "\n".join(rows))) - - # Update order - topo = [(order[id(node)], str(id(node))) for node in self.nodes] - topo.sort() - map_nodes = {str(id(node)): node for node in self.nodes} - self.nodes = [map_nodes[_[1]] for _ in topo] +# SPDX-License-Identifier: Apache-2.0 + + +import inspect +import re +import sys +import traceback +import warnings +import numpy as np +from scipy.sparse import coo_matrix +from onnx.defs import onnx_opset_version, get_all_schemas_with_history +import onnx.onnx_cpp2py_export.defs as C +from onnxconverter_common.onnx_ops import __dict__ as dict_apply_operation +from ..proto import TensorProto +from ..proto.onnx_helper_modified import ( + make_node, ValueInfoProto, make_tensor, make_attribute +) +try: + from ..proto import SparseTensorProto + from ..proto.onnx_helper_modified import make_sparse_tensor +except ImportError: + # onnx is too old. + SparseTensorProto = None + make_sparse_tensor = None +from .interface import ModelContainer +from .utils import get_domain + + +def _get_operation_list(): + """ + Investigates this module to extract all ONNX functions + which needs to be converted with these functions. + """ + regs = [re.compile("container.add_node[(]'([A-Z][a-zA-Z0-9]*)', " + "\\[?input_name"), + re.compile("container.add_node[(]'([A-Z][a-zA-Z0-9]*)', " + "\\[\\]"), + re.compile("container.add_node[(]'([A-Z][a-zA-Z0-9]*)', " + "inputs"), + re.compile("scope, '([A-Z][a-zA-Z0-9]*)', \\[?input_name"), + re.compile("op_type = '([A-Z][a-zA-Z0-9]*)'")] + res = {} + for k, v in dict_apply_operation.items(): + if k.startswith("apply_") and callable(v): + found = None + source = inspect.getsource(v) + for reg in regs: + g = reg.search(source) + if g: + found = g.groups()[0] + break + if found is None: + continue + res[found] = v + return res + + +def _build_options(model, defined_options, default_values, + allowed_options, fail): + opts = {} if default_values is None else default_values + if defined_options is not None: + opts.update(defined_options.get(type(model), {})) + opts.update(defined_options.get(id(model), {})) + if allowed_options not in (None, 'passthrough'): + for k, v in opts.items(): + if k not in allowed_options: + if fail: + raise NameError( + "Option '{}' not in {} for class '{}'.".format( + k, list(sorted(allowed_options)), + model.__class__.__name__)) + return None + allowed = allowed_options[k] + if allowed is not None and v not in allowed and v is not None: + raise ValueError( + "Unexpected value [{!r}] for option '{}'" + " (it must be in {}) for model '{}'.".format( + v, k, allowed, model.__class__.__name__)) + elif fail and len(opts) != 0 and allowed_options != 'passthrough': + raise RuntimeError( + "Options {} are not registerd for model '{}'.".format( + list(sorted(opts)), model.__class__.__name__)) + return opts + + +_apply_operation_specific = _get_operation_list() + + +class _WhiteBlackContainer: + + def __init__(self, white_op=None, black_op=None): + self._white_op = white_op + self._black_op = black_op + + def is_allowed(self, node_type): + """ + Tells if a node is white listed or not black listed. + """ + if isinstance(node_type, (list, tuple, set)): + return all(map(self.is_allowed, node_type)) + try: + self.check_white_black_list(node_type) + return True + except RuntimeError: + return False + + def check_white_black_list(self, node_type): + """ + Checks a node type is allowed according to white + and black lists. + """ + if self._white_op: + if node_type not in self._white_op: + raise RuntimeError( + "Operator '{}' is not white listed.".format(node_type)) + if self._black_op: + if node_type in self._black_op: + raise RuntimeError( + "Operator '{}' is black listed.".format(node_type)) + + +class RawModelContainerNode(_WhiteBlackContainer): + """ + This node is the carrier of the model we want to convert. + It provides an abstract layer so that our parsing + framework can work with models generated by different tools. + """ + + def __init__(self, raw_model, white_op=None, black_op=None): + """ + :param raw_model: *scikit-learn* model to convert + """ + _WhiteBlackContainer.__init__( + self, white_op=white_op, black_op=black_op) + self._raw_model = raw_model + + @property + def raw_model(self): + return self._raw_model + + @property + def input_names(self): + """ + This function should return a list of strings. Each string + corresponds to an input variable name. + :return: a list of string + """ + raise NotImplementedError() + + @property + def output_names(self): + """ + This function should return a list of strings. Each string + corresponds to an output variable name. + :return: a list of string + """ + raise NotImplementedError() + + +class SklearnModelContainerNode(RawModelContainerNode): + """ + Main container for one *scikit-learn* model. + Every converter adds nodes to an existing container + which is converted into a *ONNX* graph by an instance of + :class:`Topology `. + """ + + def __init__(self, sklearn_model, white_op=None, black_op=None): + super(SklearnModelContainerNode, self).__init__( + sklearn_model, white_op=white_op, black_op=black_op) + # Scikit-learn models have no input and output specified, + # so we create them and store them in this container. + self._inputs = [] + self._outputs = [] + + @property + def input_names(self): + return [variable.onnx_name for variable in self._inputs] + + @property + def output_names(self): + return [variable.onnx_name for variable in self._outputs] + + def add_input(self, variable): + # The order of adding variables matters. The final model's + # input names are sequentially added as this list + if variable not in self._inputs: + self._inputs.append(variable) + + def add_output(self, variable): + # The order of adding variables matters. The final model's + # output names are sequentially added as this list + if variable not in self._outputs: + self._outputs.append(variable) + + +class ModelComponentContainer(ModelContainer, _WhiteBlackContainer): + """ + In the conversion phase, this class is used to collect all materials + required to build an *ONNX* *GraphProto*, which is encapsulated in a + *ONNX* *ModelProto*. + """ + + def __init__(self, target_opset, options=None, registered_models=None, + white_op=None, black_op=None): + """ + :param target_opset: number, for example, 7 for *ONNX 1.2*, and + 8 for *ONNX 1.3*. + :param options: see :ref:`l-conv-options` + :param registered_models: registered models + :param white_op: white list of ONNX nodes allowed + while converting a pipeline, if empty, all are allowed + :param black_op: black list of ONNX nodes allowed + while converting a pipeline, if empty, none are blacklisted + """ + _WhiteBlackContainer.__init__( + self, white_op=white_op, black_op=black_op) + # Inputs of ONNX graph. They are ValueInfoProto in ONNX. + self.inputs = [] + # Outputs of ONNX graph. They are ValueInfoProto in ONNX. + self.outputs = [] + # ONNX tensors (type: TensorProto). They are initializers of + # ONNX GraphProto. + self.initializers = [] + self.initializers_strings = {} + # Intermediate variables in ONNX computational graph. They are + # ValueInfoProto in ONNX. + self.value_info = [] + # ONNX nodes (type: NodeProto) used to define computation + # structure + self.nodes = [] + # ONNX operators' domain-version pair set. They will be added + # into opset_import field in the final ONNX model. + self.node_domain_version_pair_sets = set() + # The targeted ONNX operator set (referred to as opset) that + # matches the ONNX version. + if isinstance(target_opset, dict): + self.target_opset_all = target_opset + self.target_opset = target_opset.get('', None) + else: + self.target_opset = target_opset + self.target_opset_all = {'': target_opset} + # Additional options given to converters. + self.options = options + # All registered models. + self.registered_models = registered_models + + def __str__(self): + """ + Shows internal information. + """ + rows = [] + if self.inputs: + rows.append("INPUTS") + for inp in self.inputs: + rows.append( + " " + str(inp).replace(" ", "").replace("\n", " ")) + if self.outputs: + rows.append("OUTPUTS") + for out in self.outputs: + rows.append( + " " + str(out).replace(" ", "").replace("\n", " ")) + if self.initializers: + rows.append("INITIALIZERS") + for ini in self.initializers: + rows.append( + " " + str(ini).replace(" ", "").replace("\n", " ")) + if self.value_info: + rows.append("NODES") + for val in self.value_info: + rows.append( + " " + str(val).replace(" ", "").replace("\n", " ")) + if self.nodes: + rows.append("PROTO") + for nod in self.nodes: + rows.append( + " " + str(nod).replace(" ", "").replace("\n", " ")) + return "\n".join(rows) + + def _make_value_info(self, variable): + value_info = ValueInfoProto() + value_info.name = variable.full_name + value_info.type.CopyFrom(variable.type.to_onnx_type()) + if variable.type.doc_string: + value_info.doc_string = variable.type.doc_string + return value_info + + def add_input(self, variable): + """ + Adds our *Variable* object defined _parser.py into the the input + list of the final ONNX model. + + :param variable: The Variable object to be added + """ + self.inputs.append(self._make_value_info(variable)) + + def add_output(self, variable): + """ + Adds our *Variable* object defined *_parser.py* into the the + output list of the final ONNX model. + + :param variable: The Variable object to be added + """ + self.outputs.append(self._make_value_info(variable)) + + def add_options(self, model_id, options): + """ + Adds an option, for example, + ``add_options(id(clr), {'raw_scores': True})`` + tells the converter associated to ``clr`` to + use raw score instead of probabilities. + + :param model_id: class or ``id(instance)`` + :param options: dictionary with the new values + """ + if options is None: + return + if self.options is None: + self.options = {} + if model_id not in self.options: + self.options[model_id] = None + if self.options[model_id] is None: + self.options[model_id] = {} + self.options[model_id].update(options) + + def add_initializer(self, name, onnx_type, shape, content): + """ + Adds a *TensorProto* into the initializer list of the final + ONNX model. + + :param name: Variable name in the produced ONNX model. + :param onnx_type: Element types allowed in ONNX tensor, e.g., + TensorProto.FLOAT and TensorProto.STRING. + :param shape: Tensor shape, a list of integers. + :param content: Flattened tensor values (i.e., a float list + or a float array). + :return: created tensor + """ + sparse_tensor = None + tensor = None + + cached_value = None + if isinstance(content, TensorProto): + tensor = TensorProto() + tensor.data_type = content.data_type + tensor.name = name + tensor.raw_data = content.raw_data + tensor.dims.extend(content.dims) + elif shape is None and isinstance( + content, (np.float32, np.float64, np.int32, + np.int64, float, np.int8, np.uint8, + np.bool_)): + tensor = make_tensor(name, onnx_type, [], [content]) + elif (SparseTensorProto is not None and + isinstance(content, SparseTensorProto)): + raise NotImplementedError("Not implemented yet.") + elif shape is None: + tensor = make_attribute(name, content) + elif isinstance(content, coo_matrix): + if SparseTensorProto is None: + raise RuntimeError( + "Sparse matrices require SparseTensorProto. Update onnx.") + values_tensor = make_tensor( + name + "_v", data_type=onnx_type, + dims=(len(content.data), ), vals=content.data) + indices = [i * content.shape[1] + j + for i, j in zip(content.row, content.col)] + indices_tensor = make_tensor( + name=name + "_i", data_type=TensorProto.INT64, + dims=(len(indices), ), vals=indices) + dense_shape = list(content.shape) + sparse_tensor = make_sparse_tensor( + values_tensor, indices_tensor, dense_shape) + + # cached value: same without names + values_tensor = make_tensor( + "_v", data_type=onnx_type, + dims=(len(content.data), ), vals=content.data) + indices_tensor = make_tensor( + name="_i", data_type=TensorProto.INT64, + dims=(len(indices), ), vals=indices) + cached_value = make_sparse_tensor( + values_tensor, indices_tensor, dense_shape) + + else: + if any(d is None for d in shape): + raise ValueError('Shape of initializer cannot contain None.') + if (hasattr(content, 'dtype') and + content.dtype in (bool, np.bool_)): + content = content.astype(np.int32) + try: + tensor = make_tensor(name, onnx_type, shape, content) + except TypeError as e: + raise TypeError( + "Unable to make a tensor name=%r " + "onnx_type=%r shape=%r content-type=%r." % ( + name, onnx_type, shape, type(content))) from e + + if tensor is not None: + if cached_value is None: + name = tensor.name + tensor.name = "tensor" + content = tensor.SerializeToString() + tensor.name = name + else: + content = cached_value.SerializeToString() + cached_name = self.initializers_strings.get(content, None) + if cached_name is None: + self.initializers_strings[content] = name + self.initializers.append(tensor) + return tensor + + self.add_node( + 'Identity', cached_name, name, op_version=self.target_opset, + name=name + '_op') + return name + + if sparse_tensor is not None: + content = cached_value.SerializeToString() + cached_name = self.initializers_strings.get(content, None) + if cached_name is None: + self.initializers_strings[content] = name + self.add_node( + 'Constant', [], [name], sparse_value=sparse_tensor, + op_version=self.target_opset, name=name + '_op') + return sparse_tensor + + self.add_node( + 'Identity', cached_name, name, op_version=self.target_opset, + name=name + '_op') + return name + + raise RuntimeError( + "Either tensor or sparse_tensor should be defined.") + + def add_value_info(self, variable): + self.value_info.append(self._make_value_info(variable)) + + def _check_operator(self, op_type): + """ + Checks that if *op_type* is one of the operators defined in + :mod:`skl2onnx.common._apply_container`, then it was called + from a function defined in this submodule by looking + into the callstack. The test is enabled for *python >= 3.6*. + """ + if (op_type in _apply_operation_specific and + sys.version_info[:2] >= (3, 6)): + tb = traceback.extract_stack() + operation = [] + fct = _apply_operation_specific[op_type] + skl2 = False + for b in tb: + if "_apply_operation" in b.filename and b.name == fct.__name__: + operation.append(b) + if not skl2 and "skl2onnx" in b.filename: + skl2 = True + if skl2 and len(operation) == 0: + raise RuntimeError( + "Operator '{0}' should be added with function " + "'{1}' in submodule _apply_operation.".format( + op_type, fct.__name__)) + self.check_white_black_list(op_type) + + def add_node(self, op_type, inputs, outputs, op_domain='', op_version=None, + name=None, **attrs): + """ + Adds a *NodeProto* into the node list of the final ONNX model. + If the input operator's domain-version information cannot be + found in our domain-version pool (a Python set), we may add it. + + :param op_type: A string (e.g., Pool and Conv) indicating the + type of the NodeProto + :param inputs: A list of strings. They are the input variables' + names of the considered NodeProto + :param outputs: A list of strings. They are the output + variables' names of the considered NodeProto + :param op_domain: The domain name (e.g., ai.onnx.ml) of the + operator we are trying to add. + :param op_version: The version number (e.g., 0 and 1) of the + operator we are trying to add. + :param name: name of the node, this name cannot be empty + :param attrs: A Python dictionary. Keys and values are + attributes' names and attributes' values, + respectively. + """ + if name is None or not isinstance( + name, str) or name == '': + name = "N%d" % len(self.nodes) + existing_names = set(n.name for n in self.nodes) + if name in existing_names: + name += "-N%d" % len(self.nodes) + + if op_domain is None: + op_domain = get_domain() + self._check_operator(op_type) + if op_version is None: + op_version = self._get_op_version(op_domain, op_type) + + if isinstance(inputs, str): + inputs = [inputs] + if isinstance(outputs, str): + outputs = [outputs] + try: + common = set(inputs) & set(outputs) + except TypeError as e: + raise TypeError( + "inputs or outputs are wrong, inputs=%r, outputs=%r, node=%r." + "" % (inputs, outputs, op_type)) from e + if common: + raise RuntimeError( + "inputs and outputs cannot have " + "variables in common {} in node '{}' " + "with name '{}'.".format(common, op_type, name)) + if not isinstance(inputs, list) or not all( + isinstance(s, str) for s in inputs): + type_list = ','.join(list(str(type(s)) for s in inputs)) + raise ValueError('Inputs must be a list of string but get [%s]' + % type_list) + if (not isinstance(outputs, list) or + not all(isinstance(s, str) for s in outputs)): + type_list = ','.join(list(str(type(s)) for s in outputs)) + raise ValueError('Outputs must be a list of string but get [%s]' + % type_list) + upd = {} + dtypes = set() + for k, v in attrs.items(): + if v is None: + raise ValueError( + 'Failed to create ONNX node. Undefined ' + 'attribute pair (%s, %s) found for type %r and ' + 'version %r' % ( + k, v, op_type, op_version)) + if isinstance(v, np.ndarray): + upd[k] = v + dtypes.add(v.dtype) + + if upd: + attrs.update(upd) + if 'dtype' in attrs: + raise RuntimeError("dtype should not be a parameter.") + if len(dtypes) == 0: + dtype = None + elif len(dtypes) == 1: + dtype = list(dtypes)[0] + elif (np.float32 in dtypes and np.float64 in dtypes): + raise RuntimeError( + "Unable to select a dtype among {}.".format(dtypes)) + else: + dtype = None + try: + node = make_node(op_type, inputs, outputs, name=name, + _dtype=dtype, **attrs) + except ValueError as e: + raise ValueError("Unable to create node '{}' with name='{}'." + "".format(op_type, name)) from e + node.domain = op_domain + + self.node_domain_version_pair_sets.add((op_domain, op_version)) + self.nodes.append(node) + if (self.target_opset is not None and + op_version is not None and + op_version > self.target_opset_any_domain(op_domain)): + raise RuntimeError( + "Opset number {} is higher than targeted opsets {} for " + "node '{}' (domain: '{}').".format( + op_version, self.target_opset_all, + node.op_type, op_domain)) + + def target_opset_any_domain(self, domain): + target_opset = self.target_opset_all + if isinstance(target_opset, dict): + if domain in target_opset: + to = target_opset[domain] + else: + to = None + if to is None and domain == '': + to = onnx_opset_version() + if to is None: + smap = C.schema_version_map() + if domain in smap: + to = smap[domain][1] + if to is not None: + return to + # The domain is not registered in onnx, it is probably + # a custom domain. We assume the version is one. + return 1 + return self.target_opset + + @property + def target_opset_onnx(self): + return self.target_opset_any_domain('') + + def _get_op_version(self, domain, op_type): + """ + Determines the highest version of operator + *op_type* below or equal to *target_opset*. + """ + if not hasattr(self, '_op_versions'): + self._build_op_version() + key = domain, op_type + vers = self._op_versions.get(key, None) + if vers is None: + warnings.warn( + "Unable to find operator '{}' in domain '{}' in ONNX, " + "op_version is forced to 1.".format( + op_type, domain)) + vers = [1] + highest = self.target_opset_any_domain(domain) + pos = len(vers) - 1 + while pos >= 0: + if vers[pos] <= highest: + return vers[pos] + pos -= 1 + raise RuntimeError( + "Unable to find a suitable version for operator '{}' " + "in domain '{}'. Available versions: {}.".format( + op_type, domain, vers)) + + def _build_op_version(self): + res = {} + for schema in get_all_schemas_with_history(): + dom = schema.domain + name = schema.name + vers = schema.since_version + if (dom, name) not in res: + res[dom, name] = set() + res[dom, name].add(vers) + self._op_versions = {} + for k, v in res.items(): + self._op_versions[k] = list(sorted(v)) + + def _get_allowed_options(self, model): + if self.registered_models is not None: + if inspect.isfunction(model): + if model not in self.registered_models['aliases']: + return None + alias = self.registered_models['aliases'][model] + elif hasattr(model, 'alias'): + alias = model.alias + else: + if type(model) not in self.registered_models['aliases']: + return {} + alias = self.registered_models['aliases'][type(model)] + conv = self.registered_models['conv'][alias] + allowed = conv.get_allowed_options() + if allowed is None: + return {} + return allowed + clname = (str(model) if inspect.isfunction(model) + else model.__class__.__name__) + raise NotImplementedError( + "No registered models, no known allowed options " + "for model '{}'.".format(clname)) + + def validate_options(self, operator): + """ + Validates every operator allows the options + given by the user at converter time + for an operator. + """ + skl_op = operator.raw_operator + self.get_options(skl_op) + + def get_options(self, model, default_values=None, fail=True): + """ + Returns additional options for a model. + It first looks by class then by id (``id(model)``). + :param model: model being converted + :param default_values: default options (it is modified by + the function) + :param fail: fails if options not found + :return: dictionary + """ + return _build_options( + model, self.options, default_values, + self._get_allowed_options(model), fail=fail) + + def has_options(self, model, option_name): + """ + Tells if a model allows one specific options. + + :param model: model being converted + :return: boolean + """ + opts = self._get_allowed_options(model) + return option_name in opts + + def ensure_topological_order(self): + """ + Ensures and modifies the order of nodes to have + a topological order (every node in the list + can only be an input for a node later in this list). + The function raises an exception if a cycle is detected. + """ + order = {} + for inp in self.inputs: + name = inp.name + order[name] = 0 + for inp in self.initializers: + name = inp.name + order[name] = 0 + n_iter = 0 + while n_iter < len(self.nodes) * 2: + n_iter += 1 + missing_names = set() + missing_ops = [] + for node in self.nodes: + maxi = 0 + for name in node.input: + if name in order: + maxi = max(maxi, order[name]) + else: + maxi = None + missing_names.add(name) + break + if maxi is None: + missing_ops.append(node) + continue + key = id(node) + if key in order: + continue + maxi += 1 + order[key] = maxi + maxi += 1 + for name in node.output: + if name in order: + raise RuntimeError( + "Unable to sort a node (cycle). An output was " + "already ordered %r (iteration=%r)." % ( + name, n_iter)) + order[name] = maxi + if len(missing_names) == 0: + continue + + if len(missing_ops) > 0: + def nstr(name): + if name in order: + return "%s#%d" % (name, order[name]) + return name + rows = ["%s(%s) -> [%s]" % ( + n.name or n.op_type, + ', '.join(map(nstr, n.input)), + ', '.join(n.output)) + for n in missing_ops] + rows.insert(0, "") + rows.append("--") + rows.append("--all-nodes--") + rows.append("--") + rows.extend("%s(%s) -> [%s]" % ( + n.name or n.op_type, + ', '.join(map(nstr, n.input)), + ', '.join(n.output)) + for n in self.nodes) + raise RuntimeError( + "After %d iterations for %d nodes, still unable " + "to sort names %r. The graph may be disconnected. " + "List of operators: %s" % ( + n_iter, len(self.nodes), missing_names, + "\n".join(rows))) + + # Update order + topo = [(order[id(node)], str(id(node))) for node in self.nodes] + topo.sort() + map_nodes = {str(id(node)): node for node in self.nodes} + self.nodes = [map_nodes[_[1]] for _ in topo] diff --git a/tests/test_algebra_onnx_operators_sub_estimator.py b/tests/test_algebra_onnx_operators_sub_estimator.py index 704d6e498..ab23e6ede 100644 --- a/tests/test_algebra_onnx_operators_sub_estimator.py +++ b/tests/test_algebra_onnx_operators_sub_estimator.py @@ -305,5 +305,5 @@ def test_sub_sub_estimator(self): if __name__ == "__main__": - TestOnnxOperatorSubEstimator().test_sub_sub_estimator() + # TestOnnxOperatorSubEstimator().test_sub_sub_estimator() unittest.main() From 7dab59a236d311d09ccc79de7b3eb9f3ef300f17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 27 Jul 2021 19:06:17 +0200 Subject: [PATCH 3/6] type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré --- skl2onnx/algebra/graph_state.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/skl2onnx/algebra/graph_state.py b/skl2onnx/algebra/graph_state.py index 690adbff8..cd4cb7836 100644 --- a/skl2onnx/algebra/graph_state.py +++ b/skl2onnx/algebra/graph_state.py @@ -514,7 +514,10 @@ def run(self): # Registers the variables into scope. self.computed_outputs_ = [] for name, kind in computed_outputs: - var = self.scope.declare_local_variable(name, kind) - var.onnx_name = name # name already comes from scope.get_unique_variable_name - var.is_fed = True - self.computed_outputs_.append(var) + if isinstance(kind, str): + self.computed_outputs_.append((name, kind)) + else: + var = self.scope.declare_local_variable(name, kind) + var.onnx_name = name # name already comes from scope.get_unique_variable_name + var.is_fed = True + self.computed_outputs_.append(var) From e4c1754098727dc0c0793540acc6cdfadf01d035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Tue, 27 Jul 2021 19:34:30 +0200 Subject: [PATCH 4/6] fix scaler converter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré --- skl2onnx/algebra/graph_state.py | 4 ++- skl2onnx/operator_converters/scaler_op.py | 27 ++++++++++++++----- tests/test_algebra_onnx_operators.py | 20 +++++++------- ...st_algebra_onnx_operators_sub_estimator.py | 3 +-- tests/test_algebra_to_onnx.py | 8 +++++- 5 files changed, 43 insertions(+), 19 deletions(-) diff --git a/skl2onnx/algebra/graph_state.py b/skl2onnx/algebra/graph_state.py index cd4cb7836..26e57a225 100644 --- a/skl2onnx/algebra/graph_state.py +++ b/skl2onnx/algebra/graph_state.py @@ -518,6 +518,8 @@ def run(self): self.computed_outputs_.append((name, kind)) else: var = self.scope.declare_local_variable(name, kind) - var.onnx_name = name # name already comes from scope.get_unique_variable_name + # name already comes from + # scope.get_unique_variable_name + var.onnx_name = name var.is_fed = True self.computed_outputs_.append(var) diff --git a/skl2onnx/operator_converters/scaler_op.py b/skl2onnx/operator_converters/scaler_op.py index 1a04040de..dae35d10c 100644 --- a/skl2onnx/operator_converters/scaler_op.py +++ b/skl2onnx/operator_converters/scaler_op.py @@ -32,25 +32,33 @@ def convert_sklearn_scaler(scope: Scope, operator: Operator, if isinstance(op, StandardScaler): C = (operator.inputs[0].type.shape[1] if len(operator.inputs[0].type.shape) == 2 else 1) - attrs['offset'] = op.mean_ if op.with_mean else [0.0] * C - attrs['scale'] = 1.0 / op.scale_ if op.with_std else [1.0] * C + attrs['offset'] = ( + op.mean_ if op.with_mean else + np.array([0.0] * C, dtype=np.float32)) + attrs['scale'] = ( + 1.0 / op.scale_ if op.with_std else + np.array([1.0] * C, dtype=np.float32)) inv_scale = op.scale_ if op.with_std else None elif isinstance(op, RobustScaler): C = (operator.inputs[0].type.shape[1] if len(operator.inputs[0].type.shape) == 2 else 1) - attrs['offset'] = op.center_ if op.with_centering else [0.0] * C - attrs['scale'] = 1.0 / op.scale_ if op.with_scaling else [1.0] * C + attrs['offset'] = ( + op.center_ if op.with_centering else + np.array([0.0] * C, dtype=np.float32)) + attrs['scale'] = ( + 1.0 / op.scale_ if op.with_scaling else + np.array([1.0] * C, dtype=np.float32)) inv_scale = op.scale_ if op.with_scaling else None elif isinstance(op, MinMaxScaler): attrs['scale'] = op.scale_ # Add 1e-8 to avoid divided by 0 - attrs['offset'] = -op.min_/(op.scale_ + 1e-8) + attrs['offset'] = -op.min_ / (op.scale_ + 1e-8) inv_scale = None elif isinstance(op, MaxAbsScaler): C = (operator.inputs[0].type.shape[1] if len(operator.inputs[0].type.shape) == 2 else 1) attrs['scale'] = 1.0 / op.scale_ - attrs['offset'] = [0.] * C + attrs['offset'] = np.array([0.] * C, dtype=np.float32) inv_scale = op.scale_ else: raise ValueError('Only scikit-learn StandardScaler and RobustScaler ' @@ -107,6 +115,13 @@ def convert_sklearn_scaler(scope: Scope, operator: Operator, cast.add_to(scope, container) return + if attrs['offset'].size != attrs['scale'].size: + # Scaler does not accept different size for offset and scale. + size = max(attrs['offset'].size, attrs['scale'].size) + ones = np.ones(size, dtype=attrs['offset'].dtype) + attrs['offset'] = attrs['offset'] * ones + attrs['scale'] = attrs['scale'] * ones + container.add_node( op_type, feature_name, operator.outputs[0].full_name, op_domain='ai.onnx.ml', **attrs) diff --git a/tests/test_algebra_onnx_operators.py b/tests/test_algebra_onnx_operators.py index a884fcc6a..a7dca7beb 100644 --- a/tests/test_algebra_onnx_operators.py +++ b/tests/test_algebra_onnx_operators.py @@ -7,12 +7,16 @@ import numpy as np from numpy.testing import assert_almost_equal import onnx +from onnx import ( + helper, TensorProto, load_model, + __version__ as onnx__version__) from sklearn.base import BaseEstimator, TransformerMixin from sklearn.cluster import KMeans from sklearn.datasets import load_iris from sklearn.utils.extmath import row_norms from onnxruntime import InferenceSession from skl2onnx import convert_sklearn +from skl2onnx.common._topology import Variable from skl2onnx.common.data_types import ( FloatTensorType, guess_numpy_type, DoubleTensorType) from skl2onnx.algebra.onnx_operator import OnnxOperator @@ -21,12 +25,7 @@ OnnxReduceSumSquare, OnnxGemm, OnnxAdd, OnnxArgMin, OnnxSqrt, OnnxArrayFeatureExtractor, OnnxMul, - OnnxPad, OnnxBatchNormalization -) -from onnx import ( - helper, TensorProto, load_model, - __version__ as onnx__version__ -) + OnnxPad, OnnxBatchNormalization) from test_utils import dump_data_and_model, TARGET_OPSET @@ -226,9 +225,12 @@ def test_onnx_reversed_order(self): onnx2 = model_def.SerializeToString() self.assertIsInstance(onx.outputs, list) self.assertEqual(len(onx.outputs), 1) - self.assertIsInstance(onx.outputs[0], tuple) - self.assertEqual(len(onx.outputs[0]), 2) - self.assertIsInstance(onx.outputs[0][1], DoubleTensorType) + self.assertIsInstance(onx.outputs[0], (Variable, tuple)) + if isinstance(onx.outputs[0], tuple): + self.assertEqual(len(onx.outputs[0]), 2) + self.assertIsInstance(onx.outputs[0][1], DoubleTensorType) + else: + self.assertIsInstance(onx.outputs[0].type, DoubleTensorType) # There should be 2 outputs here, bug in ONNX? self.assertEqual(len(model_def.graph.output), 1) reload = load_model(BytesIO(onnx2)) diff --git a/tests/test_algebra_onnx_operators_sub_estimator.py b/tests/test_algebra_onnx_operators_sub_estimator.py index ab23e6ede..e66b2337d 100644 --- a/tests/test_algebra_onnx_operators_sub_estimator.py +++ b/tests/test_algebra_onnx_operators_sub_estimator.py @@ -19,7 +19,7 @@ from skl2onnx import to_onnx, get_model_alias from skl2onnx.proto import onnx_proto from skl2onnx.common.data_types import ( - FloatTensorType, Int64TensorType, guess_numpy_type) + FloatTensorType, Int64TensorType) from skl2onnx.algebra.onnx_operator import OnnxSubEstimator from test_utils import TARGET_OPSET @@ -305,5 +305,4 @@ def test_sub_sub_estimator(self): if __name__ == "__main__": - # TestOnnxOperatorSubEstimator().test_sub_sub_estimator() unittest.main() diff --git a/tests/test_algebra_to_onnx.py b/tests/test_algebra_to_onnx.py index be87601ca..d4bf0b488 100644 --- a/tests/test_algebra_to_onnx.py +++ b/tests/test_algebra_to_onnx.py @@ -83,7 +83,13 @@ def generate_onnx_graph(opv): [('Y', FloatTensorType(shape=[]))]] for i, node in enumerate(nodes): shape = node.get_output_type_inference(inputs) - self.assertEqual(str(expected[i]), str(shape)) + self.assertEqual(len(shape), 1) + if isinstance(shape[0], tuple): + self.assertEqual(str(expected[i]), str(shape)) + else: + self.assertEqual( + str(expected[i]), + str([(shape[0].onnx_name, shape[0].type)])) inputs = shape def common_test_sub_graph(self, first_input, model, options=None, From ed1fdf7f0a70d3508203a9b3417ca4b859c89a8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Wed, 28 Jul 2021 10:14:12 +0200 Subject: [PATCH 5/6] decimal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré --- tests/test_algebra_onnx_operators_sub_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_algebra_onnx_operators_sub_estimator.py b/tests/test_algebra_onnx_operators_sub_estimator.py index e66b2337d..5fcd36e63 100644 --- a/tests/test_algebra_onnx_operators_sub_estimator.py +++ b/tests/test_algebra_onnx_operators_sub_estimator.py @@ -301,7 +301,7 @@ def test_sub_sub_estimator(self): model, X32, target_opset=TARGET_OPSET) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'X': X32}) - assert_almost_equal(model.transform(X32), res[0]) + assert_almost_equal(model.transform(X32), res[0], decimal=5) if __name__ == "__main__": From 4b29fb4b7b9537bc5d533e001ff0f2baa99cc501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Fri, 30 Jul 2021 16:58:04 +0200 Subject: [PATCH 6/6] lint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: xavier dupré --- skl2onnx/common/_container.py | 1530 ++++++++++++++++----------------- 1 file changed, 765 insertions(+), 765 deletions(-) diff --git a/skl2onnx/common/_container.py b/skl2onnx/common/_container.py index 1fc00b995..40233f387 100644 --- a/skl2onnx/common/_container.py +++ b/skl2onnx/common/_container.py @@ -1,765 +1,765 @@ -# SPDX-License-Identifier: Apache-2.0 - - -import inspect -import re -import sys -import traceback -import warnings -import numpy as np -from scipy.sparse import coo_matrix -from onnx.defs import onnx_opset_version, get_all_schemas_with_history -import onnx.onnx_cpp2py_export.defs as C -from onnxconverter_common.onnx_ops import __dict__ as dict_apply_operation -from ..proto import TensorProto -from ..proto.onnx_helper_modified import ( - make_node, ValueInfoProto, make_tensor, make_attribute -) -try: - from ..proto import SparseTensorProto - from ..proto.onnx_helper_modified import make_sparse_tensor -except ImportError: - # onnx is too old. - SparseTensorProto = None - make_sparse_tensor = None -from .interface import ModelContainer -from .utils import get_domain - - -def _get_operation_list(): - """ - Investigates this module to extract all ONNX functions - which needs to be converted with these functions. - """ - regs = [re.compile("container.add_node[(]'([A-Z][a-zA-Z0-9]*)', " - "\\[?input_name"), - re.compile("container.add_node[(]'([A-Z][a-zA-Z0-9]*)', " - "\\[\\]"), - re.compile("container.add_node[(]'([A-Z][a-zA-Z0-9]*)', " - "inputs"), - re.compile("scope, '([A-Z][a-zA-Z0-9]*)', \\[?input_name"), - re.compile("op_type = '([A-Z][a-zA-Z0-9]*)'")] - res = {} - for k, v in dict_apply_operation.items(): - if k.startswith("apply_") and callable(v): - found = None - source = inspect.getsource(v) - for reg in regs: - g = reg.search(source) - if g: - found = g.groups()[0] - break - if found is None: - continue - res[found] = v - return res - - -def _build_options(model, defined_options, default_values, - allowed_options, fail): - opts = {} if default_values is None else default_values - if defined_options is not None: - opts.update(defined_options.get(type(model), {})) - opts.update(defined_options.get(id(model), {})) - if allowed_options not in (None, 'passthrough'): - for k, v in opts.items(): - if k not in allowed_options: - if fail: - raise NameError( - "Option '{}' not in {} for class '{}'.".format( - k, list(sorted(allowed_options)), - model.__class__.__name__)) - return None - allowed = allowed_options[k] - if allowed is not None and v not in allowed and v is not None: - raise ValueError( - "Unexpected value [{!r}] for option '{}'" - " (it must be in {}) for model '{}'.".format( - v, k, allowed, model.__class__.__name__)) - elif fail and len(opts) != 0 and allowed_options != 'passthrough': - raise RuntimeError( - "Options {} are not registerd for model '{}'.".format( - list(sorted(opts)), model.__class__.__name__)) - return opts - - -_apply_operation_specific = _get_operation_list() - - -class _WhiteBlackContainer: - - def __init__(self, white_op=None, black_op=None): - self._white_op = white_op - self._black_op = black_op - - def is_allowed(self, node_type): - """ - Tells if a node is white listed or not black listed. - """ - if isinstance(node_type, (list, tuple, set)): - return all(map(self.is_allowed, node_type)) - try: - self.check_white_black_list(node_type) - return True - except RuntimeError: - return False - - def check_white_black_list(self, node_type): - """ - Checks a node type is allowed according to white - and black lists. - """ - if self._white_op: - if node_type not in self._white_op: - raise RuntimeError( - "Operator '{}' is not white listed.".format(node_type)) - if self._black_op: - if node_type in self._black_op: - raise RuntimeError( - "Operator '{}' is black listed.".format(node_type)) - - -class RawModelContainerNode(_WhiteBlackContainer): - """ - This node is the carrier of the model we want to convert. - It provides an abstract layer so that our parsing - framework can work with models generated by different tools. - """ - - def __init__(self, raw_model, white_op=None, black_op=None): - """ - :param raw_model: *scikit-learn* model to convert - """ - _WhiteBlackContainer.__init__( - self, white_op=white_op, black_op=black_op) - self._raw_model = raw_model - - @property - def raw_model(self): - return self._raw_model - - @property - def input_names(self): - """ - This function should return a list of strings. Each string - corresponds to an input variable name. - :return: a list of string - """ - raise NotImplementedError() - - @property - def output_names(self): - """ - This function should return a list of strings. Each string - corresponds to an output variable name. - :return: a list of string - """ - raise NotImplementedError() - - -class SklearnModelContainerNode(RawModelContainerNode): - """ - Main container for one *scikit-learn* model. - Every converter adds nodes to an existing container - which is converted into a *ONNX* graph by an instance of - :class:`Topology `. - """ - - def __init__(self, sklearn_model, white_op=None, black_op=None): - super(SklearnModelContainerNode, self).__init__( - sklearn_model, white_op=white_op, black_op=black_op) - # Scikit-learn models have no input and output specified, - # so we create them and store them in this container. - self._inputs = [] - self._outputs = [] - - @property - def input_names(self): - return [variable.onnx_name for variable in self._inputs] - - @property - def output_names(self): - return [variable.onnx_name for variable in self._outputs] - - def add_input(self, variable): - # The order of adding variables matters. The final model's - # input names are sequentially added as this list - if variable not in self._inputs: - self._inputs.append(variable) - - def add_output(self, variable): - # The order of adding variables matters. The final model's - # output names are sequentially added as this list - if variable not in self._outputs: - self._outputs.append(variable) - - -class ModelComponentContainer(ModelContainer, _WhiteBlackContainer): - """ - In the conversion phase, this class is used to collect all materials - required to build an *ONNX* *GraphProto*, which is encapsulated in a - *ONNX* *ModelProto*. - """ - - def __init__(self, target_opset, options=None, registered_models=None, - white_op=None, black_op=None): - """ - :param target_opset: number, for example, 7 for *ONNX 1.2*, and - 8 for *ONNX 1.3*. - :param options: see :ref:`l-conv-options` - :param registered_models: registered models - :param white_op: white list of ONNX nodes allowed - while converting a pipeline, if empty, all are allowed - :param black_op: black list of ONNX nodes allowed - while converting a pipeline, if empty, none are blacklisted - """ - _WhiteBlackContainer.__init__( - self, white_op=white_op, black_op=black_op) - # Inputs of ONNX graph. They are ValueInfoProto in ONNX. - self.inputs = [] - # Outputs of ONNX graph. They are ValueInfoProto in ONNX. - self.outputs = [] - # ONNX tensors (type: TensorProto). They are initializers of - # ONNX GraphProto. - self.initializers = [] - self.initializers_strings = {} - # Intermediate variables in ONNX computational graph. They are - # ValueInfoProto in ONNX. - self.value_info = [] - # ONNX nodes (type: NodeProto) used to define computation - # structure - self.nodes = [] - # ONNX operators' domain-version pair set. They will be added - # into opset_import field in the final ONNX model. - self.node_domain_version_pair_sets = set() - # The targeted ONNX operator set (referred to as opset) that - # matches the ONNX version. - if isinstance(target_opset, dict): - self.target_opset_all = target_opset - self.target_opset = target_opset.get('', None) - else: - self.target_opset = target_opset - self.target_opset_all = {'': target_opset} - # Additional options given to converters. - self.options = options - # All registered models. - self.registered_models = registered_models - - def __str__(self): - """ - Shows internal information. - """ - rows = [] - if self.inputs: - rows.append("INPUTS") - for inp in self.inputs: - rows.append( - " " + str(inp).replace(" ", "").replace("\n", " ")) - if self.outputs: - rows.append("OUTPUTS") - for out in self.outputs: - rows.append( - " " + str(out).replace(" ", "").replace("\n", " ")) - if self.initializers: - rows.append("INITIALIZERS") - for ini in self.initializers: - rows.append( - " " + str(ini).replace(" ", "").replace("\n", " ")) - if self.value_info: - rows.append("NODES") - for val in self.value_info: - rows.append( - " " + str(val).replace(" ", "").replace("\n", " ")) - if self.nodes: - rows.append("PROTO") - for nod in self.nodes: - rows.append( - " " + str(nod).replace(" ", "").replace("\n", " ")) - return "\n".join(rows) - - def _make_value_info(self, variable): - value_info = ValueInfoProto() - value_info.name = variable.full_name - value_info.type.CopyFrom(variable.type.to_onnx_type()) - if variable.type.doc_string: - value_info.doc_string = variable.type.doc_string - return value_info - - def add_input(self, variable): - """ - Adds our *Variable* object defined _parser.py into the the input - list of the final ONNX model. - - :param variable: The Variable object to be added - """ - self.inputs.append(self._make_value_info(variable)) - - def add_output(self, variable): - """ - Adds our *Variable* object defined *_parser.py* into the the - output list of the final ONNX model. - - :param variable: The Variable object to be added - """ - self.outputs.append(self._make_value_info(variable)) - - def add_options(self, model_id, options): - """ - Adds an option, for example, - ``add_options(id(clr), {'raw_scores': True})`` - tells the converter associated to ``clr`` to - use raw score instead of probabilities. - - :param model_id: class or ``id(instance)`` - :param options: dictionary with the new values - """ - if options is None: - return - if self.options is None: - self.options = {} - if model_id not in self.options: - self.options[model_id] = None - if self.options[model_id] is None: - self.options[model_id] = {} - self.options[model_id].update(options) - - def add_initializer(self, name, onnx_type, shape, content): - """ - Adds a *TensorProto* into the initializer list of the final - ONNX model. - - :param name: Variable name in the produced ONNX model. - :param onnx_type: Element types allowed in ONNX tensor, e.g., - TensorProto.FLOAT and TensorProto.STRING. - :param shape: Tensor shape, a list of integers. - :param content: Flattened tensor values (i.e., a float list - or a float array). - :return: created tensor - """ - sparse_tensor = None - tensor = None - - cached_value = None - if isinstance(content, TensorProto): - tensor = TensorProto() - tensor.data_type = content.data_type - tensor.name = name - tensor.raw_data = content.raw_data - tensor.dims.extend(content.dims) - elif shape is None and isinstance( - content, (np.float32, np.float64, np.int32, - np.int64, float, np.int8, np.uint8, - np.bool_)): - tensor = make_tensor(name, onnx_type, [], [content]) - elif (SparseTensorProto is not None and - isinstance(content, SparseTensorProto)): - raise NotImplementedError("Not implemented yet.") - elif shape is None: - tensor = make_attribute(name, content) - elif isinstance(content, coo_matrix): - if SparseTensorProto is None: - raise RuntimeError( - "Sparse matrices require SparseTensorProto. Update onnx.") - values_tensor = make_tensor( - name + "_v", data_type=onnx_type, - dims=(len(content.data), ), vals=content.data) - indices = [i * content.shape[1] + j - for i, j in zip(content.row, content.col)] - indices_tensor = make_tensor( - name=name + "_i", data_type=TensorProto.INT64, - dims=(len(indices), ), vals=indices) - dense_shape = list(content.shape) - sparse_tensor = make_sparse_tensor( - values_tensor, indices_tensor, dense_shape) - - # cached value: same without names - values_tensor = make_tensor( - "_v", data_type=onnx_type, - dims=(len(content.data), ), vals=content.data) - indices_tensor = make_tensor( - name="_i", data_type=TensorProto.INT64, - dims=(len(indices), ), vals=indices) - cached_value = make_sparse_tensor( - values_tensor, indices_tensor, dense_shape) - - else: - if any(d is None for d in shape): - raise ValueError('Shape of initializer cannot contain None.') - if (hasattr(content, 'dtype') and - content.dtype in (bool, np.bool_)): - content = content.astype(np.int32) - try: - tensor = make_tensor(name, onnx_type, shape, content) - except TypeError as e: - raise TypeError( - "Unable to make a tensor name=%r " - "onnx_type=%r shape=%r content-type=%r." % ( - name, onnx_type, shape, type(content))) from e - - if tensor is not None: - if cached_value is None: - name = tensor.name - tensor.name = "tensor" - content = tensor.SerializeToString() - tensor.name = name - else: - content = cached_value.SerializeToString() - cached_name = self.initializers_strings.get(content, None) - if cached_name is None: - self.initializers_strings[content] = name - self.initializers.append(tensor) - return tensor - - self.add_node( - 'Identity', cached_name, name, op_version=self.target_opset, - name=name + '_op') - return name - - if sparse_tensor is not None: - content = cached_value.SerializeToString() - cached_name = self.initializers_strings.get(content, None) - if cached_name is None: - self.initializers_strings[content] = name - self.add_node( - 'Constant', [], [name], sparse_value=sparse_tensor, - op_version=self.target_opset, name=name + '_op') - return sparse_tensor - - self.add_node( - 'Identity', cached_name, name, op_version=self.target_opset, - name=name + '_op') - return name - - raise RuntimeError( - "Either tensor or sparse_tensor should be defined.") - - def add_value_info(self, variable): - self.value_info.append(self._make_value_info(variable)) - - def _check_operator(self, op_type): - """ - Checks that if *op_type* is one of the operators defined in - :mod:`skl2onnx.common._apply_container`, then it was called - from a function defined in this submodule by looking - into the callstack. The test is enabled for *python >= 3.6*. - """ - if (op_type in _apply_operation_specific and - sys.version_info[:2] >= (3, 6)): - tb = traceback.extract_stack() - operation = [] - fct = _apply_operation_specific[op_type] - skl2 = False - for b in tb: - if "_apply_operation" in b.filename and b.name == fct.__name__: - operation.append(b) - if not skl2 and "skl2onnx" in b.filename: - skl2 = True - if skl2 and len(operation) == 0: - raise RuntimeError( - "Operator '{0}' should be added with function " - "'{1}' in submodule _apply_operation.".format( - op_type, fct.__name__)) - self.check_white_black_list(op_type) - - def add_node(self, op_type, inputs, outputs, op_domain='', op_version=None, - name=None, **attrs): - """ - Adds a *NodeProto* into the node list of the final ONNX model. - If the input operator's domain-version information cannot be - found in our domain-version pool (a Python set), we may add it. - - :param op_type: A string (e.g., Pool and Conv) indicating the - type of the NodeProto - :param inputs: A list of strings. They are the input variables' - names of the considered NodeProto - :param outputs: A list of strings. They are the output - variables' names of the considered NodeProto - :param op_domain: The domain name (e.g., ai.onnx.ml) of the - operator we are trying to add. - :param op_version: The version number (e.g., 0 and 1) of the - operator we are trying to add. - :param name: name of the node, this name cannot be empty - :param attrs: A Python dictionary. Keys and values are - attributes' names and attributes' values, - respectively. - """ - if name is None or not isinstance( - name, str) or name == '': - name = "N%d" % len(self.nodes) - existing_names = set(n.name for n in self.nodes) - if name in existing_names: - name += "-N%d" % len(self.nodes) - - if op_domain is None: - op_domain = get_domain() - self._check_operator(op_type) - if op_version is None: - op_version = self._get_op_version(op_domain, op_type) - - if isinstance(inputs, str): - inputs = [inputs] - if isinstance(outputs, str): - outputs = [outputs] - try: - common = set(inputs) & set(outputs) - except TypeError as e: - raise TypeError( - "inputs or outputs are wrong, inputs=%r, outputs=%r, node=%r." - "" % (inputs, outputs, op_type)) from e - if common: - raise RuntimeError( - "inputs and outputs cannot have " - "variables in common {} in node '{}' " - "with name '{}'.".format(common, op_type, name)) - if not isinstance(inputs, list) or not all( - isinstance(s, str) for s in inputs): - type_list = ','.join(list(str(type(s)) for s in inputs)) - raise ValueError('Inputs must be a list of string but get [%s]' - % type_list) - if (not isinstance(outputs, list) or - not all(isinstance(s, str) for s in outputs)): - type_list = ','.join(list(str(type(s)) for s in outputs)) - raise ValueError('Outputs must be a list of string but get [%s]' - % type_list) - upd = {} - dtypes = set() - for k, v in attrs.items(): - if v is None: - raise ValueError( - 'Failed to create ONNX node. Undefined ' - 'attribute pair (%s, %s) found for type %r and ' - 'version %r' % ( - k, v, op_type, op_version)) - if isinstance(v, np.ndarray): - upd[k] = v - dtypes.add(v.dtype) - - if upd: - attrs.update(upd) - if 'dtype' in attrs: - raise RuntimeError("dtype should not be a parameter.") - if len(dtypes) == 0: - dtype = None - elif len(dtypes) == 1: - dtype = list(dtypes)[0] - elif (np.float32 in dtypes and np.float64 in dtypes): - raise RuntimeError( - "Unable to select a dtype among {}.".format(dtypes)) - else: - dtype = None - try: - node = make_node(op_type, inputs, outputs, name=name, - _dtype=dtype, **attrs) - except ValueError as e: - raise ValueError("Unable to create node '{}' with name='{}'." - "".format(op_type, name)) from e - node.domain = op_domain - - self.node_domain_version_pair_sets.add((op_domain, op_version)) - self.nodes.append(node) - if (self.target_opset is not None and - op_version is not None and - op_version > self.target_opset_any_domain(op_domain)): - raise RuntimeError( - "Opset number {} is higher than targeted opsets {} for " - "node '{}' (domain: '{}').".format( - op_version, self.target_opset_all, - node.op_type, op_domain)) - - def target_opset_any_domain(self, domain): - target_opset = self.target_opset_all - if isinstance(target_opset, dict): - if domain in target_opset: - to = target_opset[domain] - else: - to = None - if to is None and domain == '': - to = onnx_opset_version() - if to is None: - smap = C.schema_version_map() - if domain in smap: - to = smap[domain][1] - if to is not None: - return to - # The domain is not registered in onnx, it is probably - # a custom domain. We assume the version is one. - return 1 - return self.target_opset - - @property - def target_opset_onnx(self): - return self.target_opset_any_domain('') - - def _get_op_version(self, domain, op_type): - """ - Determines the highest version of operator - *op_type* below or equal to *target_opset*. - """ - if not hasattr(self, '_op_versions'): - self._build_op_version() - key = domain, op_type - vers = self._op_versions.get(key, None) - if vers is None: - warnings.warn( - "Unable to find operator '{}' in domain '{}' in ONNX, " - "op_version is forced to 1.".format( - op_type, domain)) - vers = [1] - highest = self.target_opset_any_domain(domain) - pos = len(vers) - 1 - while pos >= 0: - if vers[pos] <= highest: - return vers[pos] - pos -= 1 - raise RuntimeError( - "Unable to find a suitable version for operator '{}' " - "in domain '{}'. Available versions: {}.".format( - op_type, domain, vers)) - - def _build_op_version(self): - res = {} - for schema in get_all_schemas_with_history(): - dom = schema.domain - name = schema.name - vers = schema.since_version - if (dom, name) not in res: - res[dom, name] = set() - res[dom, name].add(vers) - self._op_versions = {} - for k, v in res.items(): - self._op_versions[k] = list(sorted(v)) - - def _get_allowed_options(self, model): - if self.registered_models is not None: - if inspect.isfunction(model): - if model not in self.registered_models['aliases']: - return None - alias = self.registered_models['aliases'][model] - elif hasattr(model, 'alias'): - alias = model.alias - else: - if type(model) not in self.registered_models['aliases']: - return {} - alias = self.registered_models['aliases'][type(model)] - conv = self.registered_models['conv'][alias] - allowed = conv.get_allowed_options() - if allowed is None: - return {} - return allowed - clname = (str(model) if inspect.isfunction(model) - else model.__class__.__name__) - raise NotImplementedError( - "No registered models, no known allowed options " - "for model '{}'.".format(clname)) - - def validate_options(self, operator): - """ - Validates every operator allows the options - given by the user at converter time - for an operator. - """ - skl_op = operator.raw_operator - self.get_options(skl_op) - - def get_options(self, model, default_values=None, fail=True): - """ - Returns additional options for a model. - It first looks by class then by id (``id(model)``). - :param model: model being converted - :param default_values: default options (it is modified by - the function) - :param fail: fails if options not found - :return: dictionary - """ - return _build_options( - model, self.options, default_values, - self._get_allowed_options(model), fail=fail) - - def has_options(self, model, option_name): - """ - Tells if a model allows one specific options. - - :param model: model being converted - :return: boolean - """ - opts = self._get_allowed_options(model) - return option_name in opts - - def ensure_topological_order(self): - """ - Ensures and modifies the order of nodes to have - a topological order (every node in the list - can only be an input for a node later in this list). - The function raises an exception if a cycle is detected. - """ - order = {} - for inp in self.inputs: - name = inp.name - order[name] = 0 - for inp in self.initializers: - name = inp.name - order[name] = 0 - n_iter = 0 - while n_iter < len(self.nodes) * 2: - n_iter += 1 - missing_names = set() - missing_ops = [] - for node in self.nodes: - maxi = 0 - for name in node.input: - if name in order: - maxi = max(maxi, order[name]) - else: - maxi = None - missing_names.add(name) - break - if maxi is None: - missing_ops.append(node) - continue - key = id(node) - if key in order: - continue - maxi += 1 - order[key] = maxi - maxi += 1 - for name in node.output: - if name in order: - raise RuntimeError( - "Unable to sort a node (cycle). An output was " - "already ordered %r (iteration=%r)." % ( - name, n_iter)) - order[name] = maxi - if len(missing_names) == 0: - continue - - if len(missing_ops) > 0: - def nstr(name): - if name in order: - return "%s#%d" % (name, order[name]) - return name - rows = ["%s(%s) -> [%s]" % ( - n.name or n.op_type, - ', '.join(map(nstr, n.input)), - ', '.join(n.output)) - for n in missing_ops] - rows.insert(0, "") - rows.append("--") - rows.append("--all-nodes--") - rows.append("--") - rows.extend("%s(%s) -> [%s]" % ( - n.name or n.op_type, - ', '.join(map(nstr, n.input)), - ', '.join(n.output)) - for n in self.nodes) - raise RuntimeError( - "After %d iterations for %d nodes, still unable " - "to sort names %r. The graph may be disconnected. " - "List of operators: %s" % ( - n_iter, len(self.nodes), missing_names, - "\n".join(rows))) - - # Update order - topo = [(order[id(node)], str(id(node))) for node in self.nodes] - topo.sort() - map_nodes = {str(id(node)): node for node in self.nodes} - self.nodes = [map_nodes[_[1]] for _ in topo] +# SPDX-License-Identifier: Apache-2.0 + + +import inspect +import re +import sys +import traceback +import warnings +import numpy as np +from scipy.sparse import coo_matrix +from onnx.defs import onnx_opset_version, get_all_schemas_with_history +import onnx.onnx_cpp2py_export.defs as C +from onnxconverter_common.onnx_ops import __dict__ as dict_apply_operation +from ..proto import TensorProto +from ..proto.onnx_helper_modified import ( + make_node, ValueInfoProto, make_tensor, make_attribute +) +try: + from ..proto import SparseTensorProto + from ..proto.onnx_helper_modified import make_sparse_tensor +except ImportError: + # onnx is too old. + SparseTensorProto = None + make_sparse_tensor = None +from .interface import ModelContainer +from .utils import get_domain + + +def _get_operation_list(): + """ + Investigates this module to extract all ONNX functions + which needs to be converted with these functions. + """ + regs = [re.compile("container.add_node[(]'([A-Z][a-zA-Z0-9]*)', " + "\\[?input_name"), + re.compile("container.add_node[(]'([A-Z][a-zA-Z0-9]*)', " + "\\[\\]"), + re.compile("container.add_node[(]'([A-Z][a-zA-Z0-9]*)', " + "inputs"), + re.compile("scope, '([A-Z][a-zA-Z0-9]*)', \\[?input_name"), + re.compile("op_type = '([A-Z][a-zA-Z0-9]*)'")] + res = {} + for k, v in dict_apply_operation.items(): + if k.startswith("apply_") and callable(v): + found = None + source = inspect.getsource(v) + for reg in regs: + g = reg.search(source) + if g: + found = g.groups()[0] + break + if found is None: + continue + res[found] = v + return res + + +def _build_options(model, defined_options, default_values, + allowed_options, fail): + opts = {} if default_values is None else default_values + if defined_options is not None: + opts.update(defined_options.get(type(model), {})) + opts.update(defined_options.get(id(model), {})) + if allowed_options not in (None, 'passthrough'): + for k, v in opts.items(): + if k not in allowed_options: + if fail: + raise NameError( + "Option '{}' not in {} for class '{}'.".format( + k, list(sorted(allowed_options)), + model.__class__.__name__)) + return None + allowed = allowed_options[k] + if allowed is not None and v not in allowed and v is not None: + raise ValueError( + "Unexpected value [{!r}] for option '{}'" + " (it must be in {}) for model '{}'.".format( + v, k, allowed, model.__class__.__name__)) + elif fail and len(opts) != 0 and allowed_options != 'passthrough': + raise RuntimeError( + "Options {} are not registerd for model '{}'.".format( + list(sorted(opts)), model.__class__.__name__)) + return opts + + +_apply_operation_specific = _get_operation_list() + + +class _WhiteBlackContainer: + + def __init__(self, white_op=None, black_op=None): + self._white_op = white_op + self._black_op = black_op + + def is_allowed(self, node_type): + """ + Tells if a node is white listed or not black listed. + """ + if isinstance(node_type, (list, tuple, set)): + return all(map(self.is_allowed, node_type)) + try: + self.check_white_black_list(node_type) + return True + except RuntimeError: + return False + + def check_white_black_list(self, node_type): + """ + Checks a node type is allowed according to white + and black lists. + """ + if self._white_op: + if node_type not in self._white_op: + raise RuntimeError( + "Operator '{}' is not white listed.".format(node_type)) + if self._black_op: + if node_type in self._black_op: + raise RuntimeError( + "Operator '{}' is black listed.".format(node_type)) + + +class RawModelContainerNode(_WhiteBlackContainer): + """ + This node is the carrier of the model we want to convert. + It provides an abstract layer so that our parsing + framework can work with models generated by different tools. + """ + + def __init__(self, raw_model, white_op=None, black_op=None): + """ + :param raw_model: *scikit-learn* model to convert + """ + _WhiteBlackContainer.__init__( + self, white_op=white_op, black_op=black_op) + self._raw_model = raw_model + + @property + def raw_model(self): + return self._raw_model + + @property + def input_names(self): + """ + This function should return a list of strings. Each string + corresponds to an input variable name. + :return: a list of string + """ + raise NotImplementedError() + + @property + def output_names(self): + """ + This function should return a list of strings. Each string + corresponds to an output variable name. + :return: a list of string + """ + raise NotImplementedError() + + +class SklearnModelContainerNode(RawModelContainerNode): + """ + Main container for one *scikit-learn* model. + Every converter adds nodes to an existing container + which is converted into a *ONNX* graph by an instance of + :class:`Topology `. + """ + + def __init__(self, sklearn_model, white_op=None, black_op=None): + super(SklearnModelContainerNode, self).__init__( + sklearn_model, white_op=white_op, black_op=black_op) + # Scikit-learn models have no input and output specified, + # so we create them and store them in this container. + self._inputs = [] + self._outputs = [] + + @property + def input_names(self): + return [variable.onnx_name for variable in self._inputs] + + @property + def output_names(self): + return [variable.onnx_name for variable in self._outputs] + + def add_input(self, variable): + # The order of adding variables matters. The final model's + # input names are sequentially added as this list + if variable not in self._inputs: + self._inputs.append(variable) + + def add_output(self, variable): + # The order of adding variables matters. The final model's + # output names are sequentially added as this list + if variable not in self._outputs: + self._outputs.append(variable) + + +class ModelComponentContainer(ModelContainer, _WhiteBlackContainer): + """ + In the conversion phase, this class is used to collect all materials + required to build an *ONNX* *GraphProto*, which is encapsulated in a + *ONNX* *ModelProto*. + """ + + def __init__(self, target_opset, options=None, registered_models=None, + white_op=None, black_op=None): + """ + :param target_opset: number, for example, 7 for *ONNX 1.2*, and + 8 for *ONNX 1.3*. + :param options: see :ref:`l-conv-options` + :param registered_models: registered models + :param white_op: white list of ONNX nodes allowed + while converting a pipeline, if empty, all are allowed + :param black_op: black list of ONNX nodes allowed + while converting a pipeline, if empty, none are blacklisted + """ + _WhiteBlackContainer.__init__( + self, white_op=white_op, black_op=black_op) + # Inputs of ONNX graph. They are ValueInfoProto in ONNX. + self.inputs = [] + # Outputs of ONNX graph. They are ValueInfoProto in ONNX. + self.outputs = [] + # ONNX tensors (type: TensorProto). They are initializers of + # ONNX GraphProto. + self.initializers = [] + self.initializers_strings = {} + # Intermediate variables in ONNX computational graph. They are + # ValueInfoProto in ONNX. + self.value_info = [] + # ONNX nodes (type: NodeProto) used to define computation + # structure + self.nodes = [] + # ONNX operators' domain-version pair set. They will be added + # into opset_import field in the final ONNX model. + self.node_domain_version_pair_sets = set() + # The targeted ONNX operator set (referred to as opset) that + # matches the ONNX version. + if isinstance(target_opset, dict): + self.target_opset_all = target_opset + self.target_opset = target_opset.get('', None) + else: + self.target_opset = target_opset + self.target_opset_all = {'': target_opset} + # Additional options given to converters. + self.options = options + # All registered models. + self.registered_models = registered_models + + def __str__(self): + """ + Shows internal information. + """ + rows = [] + if self.inputs: + rows.append("INPUTS") + for inp in self.inputs: + rows.append( + " " + str(inp).replace(" ", "").replace("\n", " ")) + if self.outputs: + rows.append("OUTPUTS") + for out in self.outputs: + rows.append( + " " + str(out).replace(" ", "").replace("\n", " ")) + if self.initializers: + rows.append("INITIALIZERS") + for ini in self.initializers: + rows.append( + " " + str(ini).replace(" ", "").replace("\n", " ")) + if self.value_info: + rows.append("NODES") + for val in self.value_info: + rows.append( + " " + str(val).replace(" ", "").replace("\n", " ")) + if self.nodes: + rows.append("PROTO") + for nod in self.nodes: + rows.append( + " " + str(nod).replace(" ", "").replace("\n", " ")) + return "\n".join(rows) + + def _make_value_info(self, variable): + value_info = ValueInfoProto() + value_info.name = variable.full_name + value_info.type.CopyFrom(variable.type.to_onnx_type()) + if variable.type.doc_string: + value_info.doc_string = variable.type.doc_string + return value_info + + def add_input(self, variable): + """ + Adds our *Variable* object defined _parser.py into the the input + list of the final ONNX model. + + :param variable: The Variable object to be added + """ + self.inputs.append(self._make_value_info(variable)) + + def add_output(self, variable): + """ + Adds our *Variable* object defined *_parser.py* into the the + output list of the final ONNX model. + + :param variable: The Variable object to be added + """ + self.outputs.append(self._make_value_info(variable)) + + def add_options(self, model_id, options): + """ + Adds an option, for example, + ``add_options(id(clr), {'raw_scores': True})`` + tells the converter associated to ``clr`` to + use raw score instead of probabilities. + + :param model_id: class or ``id(instance)`` + :param options: dictionary with the new values + """ + if options is None: + return + if self.options is None: + self.options = {} + if model_id not in self.options: + self.options[model_id] = None + if self.options[model_id] is None: + self.options[model_id] = {} + self.options[model_id].update(options) + + def add_initializer(self, name, onnx_type, shape, content): + """ + Adds a *TensorProto* into the initializer list of the final + ONNX model. + + :param name: Variable name in the produced ONNX model. + :param onnx_type: Element types allowed in ONNX tensor, e.g., + TensorProto.FLOAT and TensorProto.STRING. + :param shape: Tensor shape, a list of integers. + :param content: Flattened tensor values (i.e., a float list + or a float array). + :return: created tensor + """ + sparse_tensor = None + tensor = None + + cached_value = None + if isinstance(content, TensorProto): + tensor = TensorProto() + tensor.data_type = content.data_type + tensor.name = name + tensor.raw_data = content.raw_data + tensor.dims.extend(content.dims) + elif shape is None and isinstance( + content, (np.float32, np.float64, np.int32, + np.int64, float, np.int8, np.uint8, + np.bool_)): + tensor = make_tensor(name, onnx_type, [], [content]) + elif (SparseTensorProto is not None and + isinstance(content, SparseTensorProto)): + raise NotImplementedError("Not implemented yet.") + elif shape is None: + tensor = make_attribute(name, content) + elif isinstance(content, coo_matrix): + if SparseTensorProto is None: + raise RuntimeError( + "Sparse matrices require SparseTensorProto. Update onnx.") + values_tensor = make_tensor( + name + "_v", data_type=onnx_type, + dims=(len(content.data), ), vals=content.data) + indices = [i * content.shape[1] + j + for i, j in zip(content.row, content.col)] + indices_tensor = make_tensor( + name=name + "_i", data_type=TensorProto.INT64, + dims=(len(indices), ), vals=indices) + dense_shape = list(content.shape) + sparse_tensor = make_sparse_tensor( + values_tensor, indices_tensor, dense_shape) + + # cached value: same without names + values_tensor = make_tensor( + "_v", data_type=onnx_type, + dims=(len(content.data), ), vals=content.data) + indices_tensor = make_tensor( + name="_i", data_type=TensorProto.INT64, + dims=(len(indices), ), vals=indices) + cached_value = make_sparse_tensor( + values_tensor, indices_tensor, dense_shape) + + else: + if any(d is None for d in shape): + raise ValueError('Shape of initializer cannot contain None.') + if (hasattr(content, 'dtype') and + content.dtype in (bool, np.bool_)): + content = content.astype(np.int32) + try: + tensor = make_tensor(name, onnx_type, shape, content) + except TypeError as e: + raise TypeError( + "Unable to make a tensor name=%r " + "onnx_type=%r shape=%r content-type=%r." % ( + name, onnx_type, shape, type(content))) from e + + if tensor is not None: + if cached_value is None: + name = tensor.name + tensor.name = "tensor" + content = tensor.SerializeToString() + tensor.name = name + else: + content = cached_value.SerializeToString() + cached_name = self.initializers_strings.get(content, None) + if cached_name is None: + self.initializers_strings[content] = name + self.initializers.append(tensor) + return tensor + + self.add_node( + 'Identity', cached_name, name, op_version=self.target_opset, + name=name + '_op') + return name + + if sparse_tensor is not None: + content = cached_value.SerializeToString() + cached_name = self.initializers_strings.get(content, None) + if cached_name is None: + self.initializers_strings[content] = name + self.add_node( + 'Constant', [], [name], sparse_value=sparse_tensor, + op_version=self.target_opset, name=name + '_op') + return sparse_tensor + + self.add_node( + 'Identity', cached_name, name, op_version=self.target_opset, + name=name + '_op') + return name + + raise RuntimeError( + "Either tensor or sparse_tensor should be defined.") + + def add_value_info(self, variable): + self.value_info.append(self._make_value_info(variable)) + + def _check_operator(self, op_type): + """ + Checks that if *op_type* is one of the operators defined in + :mod:`skl2onnx.common._apply_container`, then it was called + from a function defined in this submodule by looking + into the callstack. The test is enabled for *python >= 3.6*. + """ + if (op_type in _apply_operation_specific and + sys.version_info[:2] >= (3, 6)): + tb = traceback.extract_stack() + operation = [] + fct = _apply_operation_specific[op_type] + skl2 = False + for b in tb: + if "_apply_operation" in b.filename and b.name == fct.__name__: + operation.append(b) + if not skl2 and "skl2onnx" in b.filename: + skl2 = True + if skl2 and len(operation) == 0: + raise RuntimeError( + "Operator '{0}' should be added with function " + "'{1}' in submodule _apply_operation.".format( + op_type, fct.__name__)) + self.check_white_black_list(op_type) + + def add_node(self, op_type, inputs, outputs, op_domain='', op_version=None, + name=None, **attrs): + """ + Adds a *NodeProto* into the node list of the final ONNX model. + If the input operator's domain-version information cannot be + found in our domain-version pool (a Python set), we may add it. + + :param op_type: A string (e.g., Pool and Conv) indicating the + type of the NodeProto + :param inputs: A list of strings. They are the input variables' + names of the considered NodeProto + :param outputs: A list of strings. They are the output + variables' names of the considered NodeProto + :param op_domain: The domain name (e.g., ai.onnx.ml) of the + operator we are trying to add. + :param op_version: The version number (e.g., 0 and 1) of the + operator we are trying to add. + :param name: name of the node, this name cannot be empty + :param attrs: A Python dictionary. Keys and values are + attributes' names and attributes' values, + respectively. + """ + if name is None or not isinstance( + name, str) or name == '': + name = "N%d" % len(self.nodes) + existing_names = set(n.name for n in self.nodes) + if name in existing_names: + name += "-N%d" % len(self.nodes) + + if op_domain is None: + op_domain = get_domain() + self._check_operator(op_type) + if op_version is None: + op_version = self._get_op_version(op_domain, op_type) + + if isinstance(inputs, str): + inputs = [inputs] + if isinstance(outputs, str): + outputs = [outputs] + try: + common = set(inputs) & set(outputs) + except TypeError as e: + raise TypeError( + "inputs or outputs are wrong, inputs=%r, outputs=%r, node=%r." + "" % (inputs, outputs, op_type)) from e + if common: + raise RuntimeError( + "inputs and outputs cannot have " + "variables in common {} in node '{}' " + "with name '{}'.".format(common, op_type, name)) + if not isinstance(inputs, list) or not all( + isinstance(s, str) for s in inputs): + type_list = ','.join(list(str(type(s)) for s in inputs)) + raise ValueError('Inputs must be a list of string but get [%s]' + % type_list) + if (not isinstance(outputs, list) or + not all(isinstance(s, str) for s in outputs)): + type_list = ','.join(list(str(type(s)) for s in outputs)) + raise ValueError('Outputs must be a list of string but get [%s]' + % type_list) + upd = {} + dtypes = set() + for k, v in attrs.items(): + if v is None: + raise ValueError( + 'Failed to create ONNX node. Undefined ' + 'attribute pair (%s, %s) found for type %r and ' + 'version %r' % ( + k, v, op_type, op_version)) + if isinstance(v, np.ndarray): + upd[k] = v + dtypes.add(v.dtype) + + if upd: + attrs.update(upd) + if 'dtype' in attrs: + raise RuntimeError("dtype should not be a parameter.") + if len(dtypes) == 0: + dtype = None + elif len(dtypes) == 1: + dtype = list(dtypes)[0] + elif (np.float32 in dtypes and np.float64 in dtypes): + raise RuntimeError( + "Unable to select a dtype among {}.".format(dtypes)) + else: + dtype = None + try: + node = make_node(op_type, inputs, outputs, name=name, + _dtype=dtype, **attrs) + except ValueError as e: + raise ValueError("Unable to create node '{}' with name='{}'." + "".format(op_type, name)) from e + node.domain = op_domain + + self.node_domain_version_pair_sets.add((op_domain, op_version)) + self.nodes.append(node) + if (self.target_opset is not None and + op_version is not None and + op_version > self.target_opset_any_domain(op_domain)): + raise RuntimeError( + "Opset number {} is higher than targeted opsets {} for " + "node '{}' (domain: '{}').".format( + op_version, self.target_opset_all, + node.op_type, op_domain)) + + def target_opset_any_domain(self, domain): + target_opset = self.target_opset_all + if isinstance(target_opset, dict): + if domain in target_opset: + to = target_opset[domain] + else: + to = None + if to is None and domain == '': + to = onnx_opset_version() + if to is None: + smap = C.schema_version_map() + if domain in smap: + to = smap[domain][1] + if to is not None: + return to + # The domain is not registered in onnx, it is probably + # a custom domain. We assume the version is one. + return 1 + return self.target_opset + + @property + def target_opset_onnx(self): + return self.target_opset_any_domain('') + + def _get_op_version(self, domain, op_type): + """ + Determines the highest version of operator + *op_type* below or equal to *target_opset*. + """ + if not hasattr(self, '_op_versions'): + self._build_op_version() + key = domain, op_type + vers = self._op_versions.get(key, None) + if vers is None: + warnings.warn( + "Unable to find operator '{}' in domain '{}' in ONNX, " + "op_version is forced to 1.".format( + op_type, domain)) + vers = [1] + highest = self.target_opset_any_domain(domain) + pos = len(vers) - 1 + while pos >= 0: + if vers[pos] <= highest: + return vers[pos] + pos -= 1 + raise RuntimeError( + "Unable to find a suitable version for operator '{}' " + "in domain '{}'. Available versions: {}.".format( + op_type, domain, vers)) + + def _build_op_version(self): + res = {} + for schema in get_all_schemas_with_history(): + dom = schema.domain + name = schema.name + vers = schema.since_version + if (dom, name) not in res: + res[dom, name] = set() + res[dom, name].add(vers) + self._op_versions = {} + for k, v in res.items(): + self._op_versions[k] = list(sorted(v)) + + def _get_allowed_options(self, model): + if self.registered_models is not None: + if inspect.isfunction(model): + if model not in self.registered_models['aliases']: + return None + alias = self.registered_models['aliases'][model] + elif hasattr(model, 'alias'): + alias = model.alias + else: + if type(model) not in self.registered_models['aliases']: + return {} + alias = self.registered_models['aliases'][type(model)] + conv = self.registered_models['conv'][alias] + allowed = conv.get_allowed_options() + if allowed is None: + return {} + return allowed + clname = (str(model) if inspect.isfunction(model) + else model.__class__.__name__) + raise NotImplementedError( + "No registered models, no known allowed options " + "for model '{}'.".format(clname)) + + def validate_options(self, operator): + """ + Validates every operator allows the options + given by the user at converter time + for an operator. + """ + skl_op = operator.raw_operator + self.get_options(skl_op) + + def get_options(self, model, default_values=None, fail=True): + """ + Returns additional options for a model. + It first looks by class then by id (``id(model)``). + :param model: model being converted + :param default_values: default options (it is modified by + the function) + :param fail: fails if options not found + :return: dictionary + """ + return _build_options( + model, self.options, default_values, + self._get_allowed_options(model), fail=fail) + + def has_options(self, model, option_name): + """ + Tells if a model allows one specific options. + + :param model: model being converted + :return: boolean + """ + opts = self._get_allowed_options(model) + return option_name in opts + + def ensure_topological_order(self): + """ + Ensures and modifies the order of nodes to have + a topological order (every node in the list + can only be an input for a node later in this list). + The function raises an exception if a cycle is detected. + """ + order = {} + for inp in self.inputs: + name = inp.name + order[name] = 0 + for inp in self.initializers: + name = inp.name + order[name] = 0 + n_iter = 0 + while n_iter < len(self.nodes) * 2: + n_iter += 1 + missing_names = set() + missing_ops = [] + for node in self.nodes: + maxi = 0 + for name in node.input: + if name in order: + maxi = max(maxi, order[name]) + else: + maxi = None + missing_names.add(name) + break + if maxi is None: + missing_ops.append(node) + continue + key = id(node) + if key in order: + continue + maxi += 1 + order[key] = maxi + maxi += 1 + for name in node.output: + if name in order: + raise RuntimeError( + "Unable to sort a node (cycle). An output was " + "already ordered %r (iteration=%r)." % ( + name, n_iter)) + order[name] = maxi + if len(missing_names) == 0: + continue + + if len(missing_ops) > 0: + def nstr(name): + if name in order: + return "%s#%d" % (name, order[name]) + return name + rows = ["%s(%s) -> [%s]" % ( + n.name or n.op_type, + ', '.join(map(nstr, n.input)), + ', '.join(n.output)) + for n in missing_ops] + rows.insert(0, "") + rows.append("--") + rows.append("--all-nodes--") + rows.append("--") + rows.extend("%s(%s) -> [%s]" % ( + n.name or n.op_type, + ', '.join(map(nstr, n.input)), + ', '.join(n.output)) + for n in self.nodes) + raise RuntimeError( + "After %d iterations for %d nodes, still unable " + "to sort names %r. The graph may be disconnected. " + "List of operators: %s" % ( + n_iter, len(self.nodes), missing_names, + "\n".join(rows))) + + # Update order + topo = [(order[id(node)], str(id(node))) for node in self.nodes] + topo.sort() + map_nodes = {str(id(node)): node for node in self.nodes} + self.nodes = [map_nodes[_[1]] for _ in topo]