Skip to content

Commit

Permalink
Support gamma objective in LGBMRegressor (#484)
Browse files Browse the repository at this point in the history
* support objective=gamma in lgbm regressor
* add unit test for objective functions
* add test case for objective='regression'
* set fraction of rows that need to be almost equal to 0.9999
* skip objective test if onnxmltools version is lower than 1.3
Signed-off-by: Jan-Benedikt Jagusch <jan.jagusch@gmail.com>
  • Loading branch information
janjagusch authored Aug 12, 2021
1 parent 0795ad0 commit b5dc2f6
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 3 deletions.
3 changes: 1 addition & 2 deletions onnxmltools/convert/lightgbm/_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ def __init__(self, booster):
if (_model_dict['objective'].startswith('binary') or
_model_dict['objective'].startswith('multiclass')):
self.operator_name = 'LgbmClassifier'
elif (_model_dict['objective'].startswith('regression') or
_model_dict['objective'].startswith('poisson')):
elif _model_dict['objective'].startswith(('regression', 'poisson', 'gamma')):
self.operator_name = 'LgbmRegressor'
else:
# Other objectives are not supported.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def convert_lightgbm(scope, operator, container):
n_classes = 1 # Regressor has only one output variable
attrs['post_transform'] = 'NONE'
attrs['n_targets'] = n_classes
elif gbm_text['objective'].startswith('poisson'):
elif gbm_text['objective'].startswith(('poisson', 'gamma')):
n_classes = 1 # Regressor has only one output variable
attrs['n_targets'] = n_classes
# 'Exp' is not a supported post_transform value in the ONNX spec yet,
Expand Down
89 changes: 89 additions & 0 deletions tests/lightgbm/test_objective_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import unittest
from typing import Dict, List, Tuple

import numpy as np
import onnxruntime
import pandas as pd
from onnx import ModelProto
from onnxconverter_common.data_types import DoubleTensorType, TensorType
from onnxmltools import convert_lightgbm
from onnxruntime import InferenceSession
from pandas.core.frame import DataFrame

from lightgbm import LGBMRegressor

_N_ROWS=10_000
_N_COLS=10
_N_DECIMALS=5
_FRAC = 0.9999

_X = pd.DataFrame(np.random.random(size=(_N_ROWS, _N_COLS)))
_Y = pd.Series(np.random.random(size=_N_ROWS))

_DTYPE_MAP: Dict[str, TensorType] = {
"float64": DoubleTensorType,
}


class ObjectiveTest(unittest.TestCase):

_objectives: Tuple[str] = (
"regression",
"poisson",
"gamma",
)

@staticmethod
def _calc_initial_types(X: DataFrame) -> List[Tuple[str, TensorType]]:
dtypes = set(str(dtype) for dtype in X.dtypes)
if len(dtypes) > 1:
raise RuntimeError(f"Test expects homogenous input matrix. Found multiple dtypes: {dtypes}.")
dtype = dtypes.pop()
tensor_type = _DTYPE_MAP[dtype]
return [("input", tensor_type(X.shape))]

@staticmethod
def _predict_with_onnx(model: ModelProto, X: DataFrame) -> np.array:
session = InferenceSession(model.SerializeToString())
output_names = [s_output.name for s_output in session.get_outputs()]
input_names = [s_input.name for s_input in session.get_inputs()]
if len(input_names) > 1:
raise RuntimeError(f"Test expects one input. Found multiple inputs: {input_names}.")
input_name = input_names[0]
return session.run(output_names, {input_name: X.values})[0][:, 0]

@staticmethod
def _assert_almost_equal(actual: np.array, desired: np.array, decimal: int=7, frac: float=1.0):
"""
Assert that almost all rows in actual and desired are almost equal to each other.
Similar to np.testing.assert_almost_equal but allows to define a fraction of rows to be almost
equal instead of expecting all rows to be almost equal.
"""
assert 0 <= frac <= 1, "frac must be in range(0, 1)."
success_abs = (abs(actual - desired) <= (10 ** -decimal)).sum()
success_rel = success_abs / len(actual)
assert success_rel >= frac, f"Only {success_abs} out of {len(actual)} rows are almost equal to {decimal} decimals."

@unittest.skipIf(tuple(int(ver) for ver in onnxruntime.__version__.split(".")) < (1, 3), "not supported in this library version")
def test_objective(self):
"""
Test if a LGBMRegressor a with certain objective (e.g. 'poisson') can be converted to ONNX
and whether the ONNX graph and the original model produce almost equal predictions.
Note that this tests is a bit flaky because of precision differences with ONNX and LightGBM
and therefore sometimes fails randomly. In these cases, a retry should resolve the issue.
"""
for objective in self._objectives:
with self.subTest(X=_X, objective=objective):
regressor = LGBMRegressor(objective=objective)
regressor.fit(_X, _Y)
regressor_onnx: ModelProto = convert_lightgbm(regressor, initial_types=self._calc_initial_types(_X))
y_pred = regressor.predict(_X)
y_pred_onnx = self._predict_with_onnx(regressor_onnx, _X)
self._assert_almost_equal(
y_pred,
y_pred_onnx,
decimal=_N_DECIMALS,
frac=_FRAC,
)

0 comments on commit b5dc2f6

Please sign in to comment.