-
Notifications
You must be signed in to change notification settings - Fork 168
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
## Describe your changes - move the onnxruntime import for calibrate to vitis execution to fix the onnxruntime 1.6 compatibility issue with openvino - pin resnet ORT version to wait microsoft/onnxruntime#17619 to be fixed. - disable vitis test for ORT 1.16 since the calibrator API is changed. - pin ort extension to 0.8.0 since the check_model will fail with latest version. - copy quant_pre_process code to olive to work-around the Windows temp folder clean permission error. microsoft/onnxruntime#17627 ## Checklist before requesting a review - [ ] Add unit tests for this change. - [x] Make sure all tests can pass. - [ ] Update documents if necessary. - [x] Format your code by running `pre-commit run --all-files` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. ## (Optional) Issue link
- Loading branch information
1 parent
38bd1d6
commit 915252c
Showing
7 changed files
with
191 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
azure-ai-ml | ||
azure-identity | ||
azureml-fsspec | ||
onnxruntime | ||
onnxruntime<=1.15.1 | ||
pytorch-lightning | ||
scipy | ||
tabulate | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
neural-compressor | ||
onnx==1.14.0 | ||
onnxruntime>=1.15.0 | ||
onnxruntime-extensions>=0.8.0 | ||
onnxruntime-extensions==0.8.0 | ||
tabulate | ||
torch>=1.13.1 | ||
transformers>=4.23.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
# -------------------------------------------------------------------------- | ||
# Copyright (c) Microsoft, Intel Corporation. All rights reserved. | ||
# Licensed under the MIT License. See License.txt in the project root for | ||
# license information. | ||
# -------------------------------------------------------------------------- | ||
|
||
import logging | ||
import shutil | ||
import tempfile | ||
import traceback | ||
from contextlib import contextmanager | ||
from pathlib import Path | ||
from typing import Optional | ||
|
||
import onnx | ||
import onnxruntime | ||
from onnxruntime.quantization.quant_utils import add_pre_process_metadata | ||
from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def quant_pre_process( | ||
input_model_path: str, | ||
output_model_path: str, | ||
skip_optimization: bool = False, | ||
skip_onnx_shape: bool = False, | ||
skip_symbolic_shape: bool = False, | ||
auto_merge: bool = False, | ||
int_max: int = 2**31 - 1, | ||
guess_output_rank: bool = False, | ||
verbose: int = 0, | ||
save_as_external_data: bool = False, | ||
all_tensors_to_one_file: bool = False, | ||
external_data_location: Optional[str] = None, | ||
external_data_size_threshold: int = 1024, | ||
) -> None: | ||
"""Shape inference and model optimization, in preparation for quantization. | ||
Args: | ||
input_model_path: Path to the input model file") | ||
output_model_path: Path to the output model file | ||
skip_optimization: Skip model optimization step if true. This may result in ONNX shape | ||
inference failure for some models. | ||
skip_onnx_shape: Skip ONNX shape inference. Symbolic shape inference is most effective | ||
with transformer based models. Skipping all shape inferences may | ||
reduce the effectiveness of quantization, as a tensor with unknown | ||
shape can not be quantized. | ||
skip_symbolic_shape: Skip symbolic shape inference. Symbolic shape inference is most | ||
effective with transformer based models. Skipping all shape | ||
inferences may reduce the effectiveness of quantization, as a tensor | ||
with unknown shape can not be quantized. | ||
auto_merge: For symbolic shape inference, automatically merge symbolic dims when | ||
conflict happens. | ||
int_max: For symbolic shape inference, specify the maximum value for integer to be | ||
treated as boundless for ops like slice | ||
guess_output_rank: Guess output rank to be the same as input 0 for unknown ops | ||
verbose: Logs detailed info of inference, 0: turn off, 1: warnings, 3: detailed | ||
save_as_external_data: Saving an ONNX model to external data | ||
all_tensors_to_one_file: Saving all the external data to one file | ||
external_data_location: The file location to save the external file | ||
external_data_size_threshold: The size threshold for external data | ||
""" | ||
with TemporaryDirectory(prefix="pre.quant.") as quant_tmp_dir: | ||
temp_path = Path(quant_tmp_dir) | ||
model = None | ||
|
||
if not skip_symbolic_shape: | ||
logger.info("Performing symbolic shape inference...") | ||
model = SymbolicShapeInference.infer_shapes( | ||
onnx.load(input_model_path), | ||
int_max, | ||
auto_merge, | ||
guess_output_rank, | ||
verbose, | ||
) | ||
|
||
if not skip_optimization: | ||
# Use ORT optimizers (native code) to optimize model | ||
if not skip_symbolic_shape: | ||
# Need to save the inferenced model to file so as to run the optimizer | ||
input_model_path = str(temp_path / "symbolic_shape_inferred.onnx") | ||
if save_as_external_data: | ||
onnx.save_model( | ||
model, | ||
input_model_path, | ||
save_as_external_data=True, | ||
all_tensors_to_one_file=all_tensors_to_one_file, | ||
size_threshold=external_data_size_threshold, | ||
convert_attribute=False, | ||
) | ||
else: | ||
onnx.save(model, input_model_path) | ||
model = None | ||
|
||
opt_model_path = str(temp_path / "optimized.onnx") | ||
try: | ||
sess_option = onnxruntime.SessionOptions() | ||
sess_option.optimized_model_filepath = opt_model_path | ||
sess_option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_BASIC | ||
_ = onnxruntime.InferenceSession(input_model_path, sess_option, providers=["CPUExecutionProvider"]) | ||
except Exception: | ||
logger.error( | ||
"ONNX Runtime Model Optimization Failed! Consider rerun with option `--skip_optimization'." | ||
) | ||
logger.error(traceback.format_exc()) | ||
|
||
input_model_path = opt_model_path | ||
|
||
if not skip_onnx_shape: | ||
# ONNX shape inference. | ||
# According to docs, infer_shapes_path should be used for 2G+ models. | ||
# If the skip optimization is specified, we could be dealing with a | ||
# large model. So be on the safe side, save the model | ||
if model is not None: | ||
input_model_path = str(temp_path / "symbolic_shape_inferred.onnx") | ||
if save_as_external_data: | ||
onnx.save_model( | ||
model, | ||
input_model_path, | ||
save_as_external_data=True, | ||
all_tensors_to_one_file=all_tensors_to_one_file, | ||
size_threshold=external_data_size_threshold, | ||
convert_attribute=False, | ||
) | ||
else: | ||
onnx.save(model, input_model_path) | ||
model = None | ||
|
||
inferred_model_path = str(temp_path / "onnx_shape_inferred.onnx") | ||
onnx.shape_inference.infer_shapes_path(input_model_path, inferred_model_path) | ||
model = onnx.load(inferred_model_path) | ||
|
||
if model is None: | ||
model = onnx.load(input_model_path) | ||
|
||
add_pre_process_metadata(model) | ||
|
||
if save_as_external_data: | ||
onnx.save_model( | ||
model, | ||
output_model_path, | ||
save_as_external_data=True, | ||
all_tensors_to_one_file=all_tensors_to_one_file, | ||
location=external_data_location, | ||
size_threshold=external_data_size_threshold, | ||
convert_attribute=False, | ||
) | ||
else: | ||
onnx.save(model, output_model_path) | ||
|
||
|
||
@contextmanager | ||
def TemporaryDirectory(**kwargs): | ||
# TODO: this is a workaround for issue https://github.com/microsoft/onnxruntime/issues/17627 | ||
# on Windows. | ||
name = tempfile.mkdtemp(**kwargs) | ||
try: | ||
yield name | ||
finally: | ||
try: | ||
shutil.rmtree(name) | ||
except OSError: | ||
logger.warning(f"Failed to remove: {name}", exc_info=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters