Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement option split to reduce discrepancies for lightgbm regressors #496

Merged
merged 18 commits into from
Sep 21, 2021
Merged
1 change: 0 additions & 1 deletion .azure-pipelines/linux-CI-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ jobs:
python -m pip install $(ONNX_PATH)
python -m pip install hummingbird-ml --no-deps
python -m pip install -r requirements.txt
pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
python -m pip install -r requirements-dev.txt
python -m pip install $(ORT_PATH)
python -m pip install pytest
Expand Down
91 changes: 72 additions & 19 deletions .azure-pipelines/linux-conda-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ trigger:
jobs:

- job: 'Test'
timeoutInMinutes: 25
pool:
vmImage: 'ubuntu-latest'
strategy:
Expand Down Expand Up @@ -70,31 +71,28 @@ jobs:
maxParallel: 3

steps:
- script: sudo install -d -m 0777 /home/vsts/.conda/envs
displayName: Fix Conda permissions

- task: CondaEnvironment@1
- task: UsePythonVersion@0
inputs:
createCustomEnvironment: true
environmentName: 'py$(python.version)'
packageSpecs: 'python=$(python.version)'
versionSpec: '$(python.version)'
architecture: 'x64'

- script: |
python -m pip install --upgrade pip
conda config --set always_yes yes --set changeps1 no
conda install -c conda-forge protobuf
conda install -c conda-forge numpy
conda install -c conda-forge cmake
pip install $(COREML_PATH)
pip install $(ONNX_PATH)
pip install hummingbird-ml --no-deps
pip install $(ONNX_PATH) $(ONNXRT_PATH) cython
pip install -r requirements.txt
pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
displayName: 'Install dependencies'

- script: |
pip install -r requirements-dev.txt
displayName: 'Install dependencies-dev'

- script: |
python -m pip install --upgrade pip
pip install xgboost$(xgboost.version)
pip install $(ONNX_PATH)
pip install $(ONNXRT_PATH)
pip install pytest
displayName: 'Install dependencies'
pip install $(COREML_PATH)
displayName: 'Install xgboost, onnxruntime'

- script: |
pip install flake8
Expand All @@ -109,8 +107,63 @@ jobs:
export PYTHONPATH=.
python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
python -c "import onnxruntime;print(onnxruntime.__version__)"
pytest tests --doctest-modules --junitxml=junit/test-results.xml
displayName: 'pytest - onnxmltools'
displayName: 'version'

- script: |
export PYTHONPATH=.
pytest tests/baseline --durations=0
displayName: 'pytest - baseline'

- script: |
export PYTHONPATH=.
pytest tests/catboost --durations=0
displayName: 'pytest - catboost'

- script: |
export PYTHONPATH=.
pytest tests/coreml --durations=0
displayName: 'pytest - coreml'

- script: |
export PYTHONPATH=.
pytest tests/lightgbm --durations=0
displayName: 'pytest - lightgbm'

- script: |
export PYTHONPATH=.
pytest tests/sparkml --durations=0
displayName: 'pytest - sparkml'

- script: |
export PYTHONPATH=.
pytest tests/svmlib --durations=0
displayName: 'pytest - svmlib'

- script: |
export PYTHONPATH=.
pytest tests/utils --durations=0
displayName: 'pytest - utils'

- script: |
export PYTHONPATH=.
pytest tests/xgboost --durations=0
displayName: 'pytest - xgboost'

- script: |
export PYTHONPATH=.
pip install h2o
pytest tests/h2o --durations=0
displayName: 'pytest - h2o'

- script: |
pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
pip install hummingbird-ml --no-deps
displayName: 'Install hummingbird-ml'

- script: |
export PYTHONPATH=.
pytest tests/hummingbirdml --durations=0
displayName: 'pytest - hummingbirdml'

- task: PublishTestResults@2
inputs:
Expand Down
1 change: 0 additions & 1 deletion .azure-pipelines/win32-CI-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ jobs:
pip install %COREML_PATH% %ONNX_PATH%
pip install humming-bird-ml --no-deps
pip install -r requirements.txt
python -m pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio===0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
pip install -r requirements-dev.txt
pip install %ONNXRT_PATH%
displayName: 'Install dependencies'
Expand Down
90 changes: 65 additions & 25 deletions .azure-pipelines/win32-conda-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ trigger:
jobs:

- job: 'Test'
timeoutInMinutes: 30
pool:
vmImage: 'windows-latest'
strategy:
Expand All @@ -18,79 +19,63 @@ jobs:
ONNX_PATH: 'onnx==1.10.1' # '-i https://test.pypi.org/simple/ onnx==1.9.101'
ONNXRT_PATH: onnxruntime==1.8.1
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

Python39-190-RT181:
python.version: '3.9'
ONNX_PATH: 'onnx==1.9.0'
ONNXRT_PATH: onnxruntime==1.8.1
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

Python39-190-RT180:
python.version: '3.9'
ONNX_PATH: onnx==1.9.0
ONNXRT_PATH: onnxruntime==1.8.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

Python38-181-RT170:
python.version: '3.8'
ONNX_PATH: onnx==1.8.1
ONNXRT_PATH: onnxruntime==1.7.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

Python37-180-RT160:
python.version: '3.7'
ONNX_PATH: onnx==1.8.0
ONNXRT_PATH: onnxruntime==1.6.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

Python37-160-RT111:
python.version: '3.7'
ONNX_PATH: onnx==1.6.0
ONNXRT_PATH: onnxruntime==1.1.1
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

Python37-170-RT130:
python.version: '3.7'
ONNX_PATH: onnx==1.7.0
ONNXRT_PATH: onnxruntime==1.3.0
COREML_PATH: git+https://github.com/apple/coremltools@3.1
sklearn.version: ''

maxParallel: 3

steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
architecture: 'x64'

- powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
displayName: Add conda to PATH

- script: conda create --yes --quiet --name py$(python.version) -c conda-forge python=$(python.version) numpy protobuf
- script: conda create --yes --quiet --name py$(python.version) -c conda-forge python=$(python.version) numpy protobuf scikit-learn scipy cython
displayName: Create Anaconda environment

- script: |
call activate py$(python.version)
python -m pip install --upgrade pip numpy
echo Test numpy installation... && python -c "import numpy"
python -m pip install scikit-learn
python -m pip install %ONNX_PATH%
python -m pip install humming-bird-ml --no-deps
python -m pip install -r requirements.txt
python -m pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio===0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
displayName: 'Install dependencies (1)'

- script: |
call activate py$(python.version)
python -m pip install -r requirements-dev.txt
displayName: 'Install dependencies (2)'
displayName: 'Install dependencies-dev'

- script: |
call activate py$(python.version)
Expand All @@ -99,14 +84,10 @@ jobs:

- script: |
call activate py$(python.version)
python -m pip install %ONNX_PATH%
python -m pip install %ONNXRT_PATH%
displayName: 'Install onnxruntime'

- script: |
call activate py$(python.version)
python -m pip install scikit-learn$(sklearn.version)
displayName: 'Install scikit-learn'

- script: |
call activate py$(python.version)
python -m flake8 ./onnxmltools
Expand All @@ -118,8 +99,67 @@ jobs:
export PYTHONPATH=.
python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
python -c "import onnxruntime;print(onnxruntime.__version__)"
python -m pytest tests --doctest-modules --junitxml=junit/test-results.xml
displayName: 'pytest - onnxmltools'
displayName: 'version'

- script: |
call activate py$(python.version)
export PYTHONPATH=.
python -m pytest tests/baseline --durations=0
displayName: 'pytest baseline'

- script: |
call activate py$(python.version)
export PYTHONPATH=.
python -m pytest tests/catboost --durations=0
displayName: 'pytest catboost'

- script: |
call activate py$(python.version)
export PYTHONPATH=.
python -m pytest tests/coreml --durations=0
displayName: 'pytest coreml'

- script: |
call activate py$(python.version)
export PYTHONPATH=.
python -m pytest tests/lightgbm --durations=0
displayName: 'pytest lightgbm'

- script: |
call activate py$(python.version)
export PYTHONPATH=.
python -m pytest tests/sparkml --durations=0
displayName: 'pytest sparkml'

- script: |
call activate py$(python.version)
export PYTHONPATH=.
python -m pytest tests/svmlib --durations=0
displayName: 'pytest svmlib'

- script: |
call activate py$(python.version)
export PYTHONPATH=.
python -m pytest tests/utils --durations=0
displayName: 'pytest utils'

- script: |
call activate py$(python.version)
export PYTHONPATH=.
python -m pytest tests/xgboost --durations=0
displayName: 'pytest xgboost'

- script: |
call activate py$(python.version)
python -m pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio===0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
python -m pip install hummingbird-ml --no-deps
displayName: 'Install hummingbird-ml'

- script: |
call activate py$(python.version)
export PYTHONPATH=.
python -m pytest tests/hummingbirdml --durations=0
displayName: 'pytest hummingbirdml'

- task: PublishTestResults@2
inputs:
Expand Down
15 changes: 10 additions & 5 deletions onnxmltools/convert/lightgbm/_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,20 @@ def _get_lightgbm_operator_name(model):
return lightgbm_operator_name_map[model_type]


def _parse_lightgbm_simple_model(scope, model, inputs):
def _parse_lightgbm_simple_model(scope, model, inputs, split=None):
'''
This function handles all non-pipeline models.

:param scope: Scope object
:param model: A lightgbm object
:param inputs: A list of variables
:param split: split TreeEnsembleRegressor into multiple node to reduce
discrepancies
:return: A list of output variables which will be passed to next stage
'''
operator_name = _get_lightgbm_operator_name(model)
this_operator = scope.declare_local_operator(operator_name, model)
this_operator.split = split
this_operator.inputs = inputs

if operator_name == 'LgbmClassifier':
Expand Down Expand Up @@ -151,27 +154,29 @@ def _parse_sklearn_classifier(scope, model, inputs, zipmap=True):
return this_operator.outputs


def _parse_lightgbm(scope, model, inputs, zipmap=True):
def _parse_lightgbm(scope, model, inputs, zipmap=True, split=None):
'''
This is a delegate function. It doesn't nothing but invoke the correct parsing function according to the input
model's type.
:param scope: Scope object
:param model: A lightgbm object
:param inputs: A list of variables
:param zipmap: add operator ZipMap after operator TreeEnsembleClassifier
:param split: split TreeEnsembleRegressor into multiple node to reduce
discrepancies
:return: The output variables produced by the input model
'''
if isinstance(model, LGBMClassifier):
return _parse_sklearn_classifier(scope, model, inputs, zipmap=zipmap)
if (isinstance(model, WrappedBooster) and
model.operator_name == 'LgbmClassifier'):
return _parse_sklearn_classifier(scope, model, inputs, zipmap=zipmap)
return _parse_lightgbm_simple_model(scope, model, inputs)
return _parse_lightgbm_simple_model(scope, model, inputs, split=split)


def parse_lightgbm(model, initial_types=None, target_opset=None,
custom_conversion_functions=None, custom_shape_calculators=None,
zipmap=True):
zipmap=True, split=None):
raw_model_container = LightGbmModelContainer(model)
topology = Topology(raw_model_container, default_batch_size='None',
initial_types=initial_types, target_opset=target_opset,
Expand All @@ -186,7 +191,7 @@ def parse_lightgbm(model, initial_types=None, target_opset=None,
for variable in inputs:
raw_model_container.add_input(variable)

outputs = _parse_lightgbm(scope, model, inputs, zipmap=zipmap)
outputs = _parse_lightgbm(scope, model, inputs, zipmap=zipmap, split=split)

for variable in outputs:
raw_model_container.add_output(variable)
Expand Down
Loading