From a6c7be71b91703197fb1df72be85fc502b64f5f4 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Mon, 4 Dec 2023 11:26:15 -0500
Subject: [PATCH 1/2] Remove the deprecated quantization tool

Signed-off-by: Jeremy Fowers <jeremy.fowers@amd.com>
---
 docs/coverage.md                            |  1 -
 src/turnkeyml/build/export.py               | 63 +----------------
 src/turnkeyml/build/ignition.py             | 64 ++---------------
 src/turnkeyml/build/quantization_helpers.py | 78 ---------------------
 src/turnkeyml/build/sequences.py            | 12 ----
 src/turnkeyml/build_api.py                  | 10 ---
 src/turnkeyml/common/build.py               | 33 +++------
 7 files changed, 17 insertions(+), 244 deletions(-)
 delete mode 100644 src/turnkeyml/build/quantization_helpers.py

diff --git a/docs/coverage.md b/docs/coverage.md
index 9cf1fc3..3bf9b89 100644
--- a/docs/coverage.md
+++ b/docs/coverage.md
@@ -41,7 +41,6 @@ Name                                      Stmts   Miss Branch BrPart  Cover   Mi
 --------------------------------------------------------------------------------------------------------
 \turnkeyml\build\__init__.py                 0      0      0      0   100%
 \turnkeyml\build\onnx_helpers.py            70     34     28      2    45%   15-21, 28-87, 92, 95-100
-\turnkeyml\build\quantization_helpers.py    29     20     18      0    19%   13-30, 35, 50-78
 \turnkeyml\build\sequences.py               15      1      8      2    87%   62->61, 65
 \turnkeyml\build\tensor_helpers.py          47     26     34      4    41%   17-44, 57, 61, 63-74, 78
 \turnkeyml\build_api.py                     31      9      8      3    64%   68-71, 120-125, 140-147
diff --git a/src/turnkeyml/build/export.py b/src/turnkeyml/build/export.py
index 4a9f157..2f4bc88 100644
--- a/src/turnkeyml/build/export.py
+++ b/src/turnkeyml/build/export.py
@@ -15,7 +15,6 @@
 import turnkeyml.common.build as build
 import turnkeyml.build.tensor_helpers as tensor_helpers
 import turnkeyml.build.onnx_helpers as onnx_helpers
-import turnkeyml.build.quantization_helpers as quant_helpers
 import turnkeyml.common.filesystem as fs
 
 
@@ -77,13 +76,6 @@ def converted_onnx_file(state: build.State):
     )
 
 
-def quantized_onnx_file(state: build.State):
-    return os.path.join(
-        onnx_dir(state),
-        f"{state.config.build_name}-op{state.config.onnx_opset}-opt-quantized_int8.onnx",
-    )
-
-
 class ExportPlaceholder(stage.Stage):
     """
     Placeholder Stage that should be replaced by a framework-specific export stage,
@@ -571,9 +563,8 @@ def fire(self, state: build.State):
         inputs_file = state.original_inputs_file
         if os.path.isfile(inputs_file):
             inputs = np.load(inputs_file, allow_pickle=True)
-            to_downcast = False if state.quantization_samples else True
             inputs_converted = tensor_helpers.save_inputs(
-                inputs, inputs_file, downcast=to_downcast
+                inputs, inputs_file, downcast=True
             )
         else:
             raise exp.StageError(
@@ -621,58 +612,6 @@ def fire(self, state: build.State):
         return state
 
 
-class QuantizeONNXModel(stage.Stage):
-    """
-    Stage that takes an ONNX model and a dataset of quantization samples as inputs,
-    and performs static post-training quantization to the model to int8 precision.
-
-    Expected inputs:
-     - state.model is a path to the ONNX model
-     - state.quantization_dataset is a dataset that is used for static quantization
-
-    Outputs:
-     - A *_quantized.onnx file => the quantized onnx model.
-    """
-
-    def __init__(self):
-        super().__init__(
-            unique_name="quantize_onnx",
-            monitor_message="Quantizing ONNX model",
-        )
-
-    def fire(self, state: build.State):
-        input_path = state.intermediate_results[0]
-        output_path = quantized_onnx_file(state)
-
-        quant_helpers.quantize(
-            input_file=input_path,
-            data=state.quantization_samples,
-            output_file=output_path,
-        )
-
-        # Check that the converted model is still valid
-        success_msg = "\tSuccess quantizing ONNX model to int8"
-        fail_msg = "\tFailed quantizing ONNX model to int8"
-
-        if check_model(output_path, success_msg, fail_msg):
-            state.intermediate_results = [output_path]
-
-            stats = fs.Stats(state.cache_dir, state.config.build_name, state.stats_id)
-            stats.add_build_stat(
-                fs.Keys.ONNX_FILE,
-                output_path,
-            )
-        else:
-            msg = f"""
-            Attempted to use {state.quantization_dataset} to statically quantize
-            model to int8 datatype, however this operation was not successful.
-            More information may be available in the log file at **{self.logfile_path}**
-            """
-            raise exp.StageError(msg)
-
-        return state
-
-
 class SuccessStage(stage.Stage):
     """
     Stage that sets state.build_status = build.Status.SUCCESSFUL_BUILD,
diff --git a/src/turnkeyml/build/ignition.py b/src/turnkeyml/build/ignition.py
index 18da5aa..abd39ba 100644
--- a/src/turnkeyml/build/ignition.py
+++ b/src/turnkeyml/build/ignition.py
@@ -258,7 +258,6 @@ def load_or_make_state(
     monitor: bool,
     model: build.UnionValidModelInstanceTypes = None,
     inputs: Optional[Dict[str, Any]] = None,
-    quantization_samples: Optional[Collection] = None,
     state_type: Type = build.State,
     cache_validation_func: Callable = validate_cached_model,
     extra_state_args: Optional[Dict] = None,
@@ -280,7 +279,6 @@ def load_or_make_state(
         "cache_dir": cache_dir,
         "config": config,
         "model_type": model_type,
-        "quantization_samples": quantization_samples,
     }
 
     # Ensure that `rebuild` has a valid value
@@ -306,50 +304,6 @@ def load_or_make_state(
                     state_type=state_type,
                 )
 
-                # if the previous build is using quantization while the current is not
-                # or vice versa
-                if state.quantization_samples and quantization_samples is None:
-                    if rebuild == "never":
-                        msg = (
-                            f"Model {config.build_name} was built in a previous call to "
-                            "build_model() with post-training quantization sample enabled."
-                            "However, post-training quantization is not enabled in the "
-                            "current build. Rebuild is necessary but currently the rebuild"
-                            "policy is set to 'never'. "
-                        )
-                        raise exp.CacheError(msg)
-
-                    msg = (
-                        f"Model {config.build_name} was built in a previous call to "
-                        "build_model() with post-training quantization sample enabled."
-                        "However, post-training quantization is not enabled in the "
-                        "current build. Starting a fresh build."
-                    )
-
-                    printing.log_info(msg)
-                    return _begin_fresh_build(state_args, state_type)
-
-                if not state.quantization_samples and quantization_samples is not None:
-                    if rebuild == "never":
-                        msg = (
-                            f"Model {config.build_name} was built in a previous call to "
-                            "build_model() with post-training quantization sample disabled."
-                            "However, post-training quantization is enabled in the "
-                            "current build. Rebuild is necessary but currently the rebuild"
-                            "policy is set to 'never'. "
-                        )
-                        raise exp.CacheError(msg)
-
-                    msg = (
-                        f"Model {config.build_name} was built in a previous call to "
-                        "build_model() with post-training quantization sample disabled."
-                        "However, post-training quantization is enabled in the "
-                        "current build. Starting a fresh build."
-                    )
-
-                    printing.log_info(msg)
-                    return _begin_fresh_build(state_args, state_type)
-
             except exp.StateError as e:
                 problem = (
                     "- build_model() failed to load "
@@ -500,7 +454,6 @@ def model_intake(
     user_model,
     user_inputs,
     user_sequence: Optional[stage.Sequence],
-    user_quantization_samples: Optional[Collection] = None,
 ) -> Tuple[Any, Any, stage.Sequence, build.ModelType, str]:
     # Model intake structure options:
     # user_model
@@ -550,18 +503,11 @@ def model_intake(
 
         sequence = copy.deepcopy(user_sequence)
         if sequence is None:
-            if user_quantization_samples:
-                if model_type != build.ModelType.PYTORCH:
-                    raise exp.IntakeError(
-                        "Currently, post training quantization only supports Pytorch models."
-                    )
-                sequence = sequences.pytorch_with_quantization
-            else:
-                sequence = stage.Sequence(
-                    "top_level_sequence",
-                    "Top Level Sequence",
-                    [sequences.onnx_fp32],
-                )
+            sequence = stage.Sequence(
+                "top_level_sequence",
+                "Top Level Sequence",
+                [sequences.onnx_fp32],
+            )
 
         # If there is an ExportPlaceholder Stage in the sequence, replace it with
         # a framework-specific export Stage.
diff --git a/src/turnkeyml/build/quantization_helpers.py b/src/turnkeyml/build/quantization_helpers.py
deleted file mode 100644
index e945bb2..0000000
--- a/src/turnkeyml/build/quantization_helpers.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import os
-import numpy as np
-
-import onnx
-import onnxruntime
-from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType
-
-
-class DataReader(CalibrationDataReader):
-    """Wrapper class around calibration data, which is used to quantize an onnx model."""
-
-    def __init__(self, input_file, samples, input_shapes=None, pack_inputs=False):
-        session = onnxruntime.InferenceSession(input_file, None)
-        input_names = [inp.name for inp in session.get_inputs()]
-
-        if pack_inputs:
-            expand_each = lambda data: [np.expand_dims(d, axis=0) for d in data]
-            self.enum_data_dicts = iter(
-                [
-                    dict(zip(input_names, expand_each(sample_inputs)))
-                    for sample_inputs in zip(*samples)
-                ]
-            )
-        else:
-            if input_shapes:
-                self.samples = samples.reshape(-1, len(input_shapes), *input_shapes[0])
-            else:
-                self.samples = samples
-
-            self.enum_data_dicts = iter(
-                [dict(zip(input_names, sample)) for sample in self.samples]
-            )
-
-    def get_next(self):
-        return next(self.enum_data_dicts, None)
-
-
-def quantize(
-    input_file,
-    data,
-    input_shapes=None,
-    pack_inputs=False,
-    verbose=False,
-    output_file=None,
-):
-    """
-    Given an onnx file and calibration data on which to quantize,
-    computes and saves quantized onnx model to a local file.
-    """
-    data_reader = DataReader(
-        input_file,
-        samples=data,
-        input_shapes=input_shapes,
-        pack_inputs=pack_inputs,
-    )
-
-    if not output_file:
-        output_file = input_file[:-5] + "_quantized.onnx"
-
-    quantize_static(
-        model_input=input_file,
-        model_output=output_file,
-        calibration_data_reader=data_reader,
-        activation_type=QuantType.QUInt8,
-        weight_type=QuantType.QInt8,
-        op_types_to_quantize=["Conv", "MatMul", "Relu"],
-        extra_options={"ActivationSymmetric": False, "WeightSymmetric": True},
-    )
-
-    onnx.save(onnx.shape_inference.infer_shapes(onnx.load(output_file)), output_file)
-
-    if os.path.isfile("augmented_model.onnx"):
-        os.remove("augmented_model.onnx")
-
-    if verbose:
-        print("Calibrated and quantized model saved.")
-
-    return output_file
diff --git a/src/turnkeyml/build/sequences.py b/src/turnkeyml/build/sequences.py
index bc39a0d..7e90ead 100644
--- a/src/turnkeyml/build/sequences.py
+++ b/src/turnkeyml/build/sequences.py
@@ -35,18 +35,6 @@
     enable_model_validation=True,
 )
 
-pytorch_with_quantization = stage.Sequence(
-    "pytorch_export_sequence_with_quantization",
-    "Exporting PyTorch Model and Quantizing Exported ONNX",
-    [
-        export.ExportPytorchModel(),
-        export.OptimizeOnnxModel(),
-        export.QuantizeONNXModel(),
-        export.SuccessStage(),
-    ],
-    enable_model_validation=True,
-)
-
 # Plugin interface for sequences
 discovered_plugins = plugins.discover()
 
diff --git a/src/turnkeyml/build_api.py b/src/turnkeyml/build_api.py
index 5cf681b..44c805c 100644
--- a/src/turnkeyml/build_api.py
+++ b/src/turnkeyml/build_api.py
@@ -17,7 +17,6 @@ def build_model(
     monitor: Optional[bool] = None,
     rebuild: Optional[str] = None,
     sequence: Optional[List[stage.Stage]] = None,
-    quantization_samples: Collection = None,
     onnx_opset: Optional[int] = None,
     device: Optional[str] = None,
 ) -> build.State:
@@ -48,11 +47,6 @@ def build_model(
             - None: Falls back to default
         sequence: Override the default sequence of build stages. Power
             users only.
-        quantization_samples: If set, performs post-training quantization
-            on the ONNX model using the provided samplesIf the previous build used samples
-            that are different to the samples used in current build, the "rebuild"
-            argument needs to be manually set to "always" in the current build
-            in order to create a new ONNX file.
         onnx_opset: ONNX opset to use during ONNX export.
         device: Specific device target to take into account during the build sequence.
             Use the format "device_family", "device_family::part", or
@@ -96,7 +90,6 @@ def build_model(
         model,
         inputs,
         sequence,
-        user_quantization_samples=quantization_samples,
     )
 
     # Get the state of the model from the cache if a valid build is available
@@ -109,7 +102,6 @@ def build_model(
         monitor=monitor_setting,
         model=model_locked,
         inputs=inputs_locked,
-        quantization_samples=quantization_samples,
     )
 
     # Return a cached build if possible, otherwise prepare the model State for
@@ -124,8 +116,6 @@ def build_model(
 
         return state
 
-    state.quantization_samples = quantization_samples
-
     sequence_locked.show_monitor(config, state.monitor)
     state = sequence_locked.launch(state)
 
diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py
index a224d1a..b55ed8b 100644
--- a/src/turnkeyml/common/build.py
+++ b/src/turnkeyml/common/build.py
@@ -259,8 +259,6 @@ class State:
     # Results of a successful build
     results: Any = None
 
-    quantization_samples: Optional[Collection] = None
-
     def __post_init__(self):
         if self.uid is None:
             self.uid = unique_id()
@@ -309,16 +307,6 @@ def prepare_state_dict(self) -> Dict:
         state_dict["model_type"] = self.model_type.value
         state_dict["build_status"] = self.build_status.value
 
-        # During actual execution, quantization_samples in the state
-        # stores the actual quantization samples.
-        # However, we do not save quantization samples
-        # Instead, we save a boolean to indicate whether the model
-        # stored has been quantized by some samples.
-        if self.quantization_samples:
-            state_dict["quantization_samples"] = True
-        else:
-            state_dict["quantization_samples"] = False
-
         return state_dict
 
     def save_yaml(self, state_dict: Dict):
@@ -524,7 +512,7 @@ def get_system_info():
     # Get OS Version
     try:
         info_dict["OS Version"] = platform.platform()
-    except Exception as e: # pylint: disable=broad-except
+    except Exception as e:  # pylint: disable=broad-except
         info_dict["Error OS Version"] = str(e)
 
     if os_type == "Windows":
@@ -537,7 +525,7 @@ def get_system_info():
                 .strip()
             )
             info_dict["Processor"] = proc_info
-        except Exception as e: # pylint: disable=broad-except
+        except Exception as e:  # pylint: disable=broad-except
             info_dict["Error Processor"] = str(e)
 
         # Get OEM System Information
@@ -549,7 +537,7 @@ def get_system_info():
                 .strip()
             )
             info_dict["OEM System"] = oem_info
-        except Exception as e: # pylint: disable=broad-except
+        except Exception as e:  # pylint: disable=broad-except
             info_dict["Error OEM System"] = str(e)
 
         # Get Physical Memory in GB
@@ -564,7 +552,7 @@ def get_system_info():
             )
             mem_info_gb = round(int(mem_info_bytes) / (1024**3), 2)
             info_dict["Physical Memory"] = f"{mem_info_gb} GB"
-        except Exception as e: # pylint: disable=broad-except
+        except Exception as e:  # pylint: disable=broad-except
             info_dict["Error Physical Memory"] = str(e)
 
     elif os_type == "Linux":
@@ -586,7 +574,7 @@ def get_system_info():
                     .strip()
                 )
                 info_dict["OEM System"] = oem_info
-            except Exception as e: # pylint: disable=broad-except
+            except Exception as e:  # pylint: disable=broad-except
                 info_dict["Error OEM System (WSL)"] = str(e)
 
         else:
@@ -602,7 +590,7 @@ def get_system_info():
                     .replace("\n", " ")
                 )
                 info_dict["OEM System"] = oem_info
-            except Exception as e: # pylint: disable=broad-except
+            except Exception as e:  # pylint: disable=broad-except
                 info_dict["Error OEM System"] = str(e)
 
         # Get CPU Information
@@ -612,7 +600,7 @@ def get_system_info():
                 if "Model name:" in line:
                     info_dict["Processor"] = line.split(":")[1].strip()
                     break
-        except Exception as e: # pylint: disable=broad-except
+        except Exception as e:  # pylint: disable=broad-except
             info_dict["Error Processor"] = str(e)
 
         # Get Memory Information
@@ -625,7 +613,7 @@ def get_system_info():
             )
             mem_info_gb = round(int(mem_info) / 1024, 2)
             info_dict["Memory Info"] = f"{mem_info_gb} GB"
-        except Exception as e: # pylint: disable=broad-except
+        except Exception as e:  # pylint: disable=broad-except
             info_dict["Error Memory Info"] = str(e)
 
     else:
@@ -635,9 +623,10 @@ def get_system_info():
     try:
         installed_packages = pkg_resources.working_set
         info_dict["Python Packages"] = [
-            f"{i.key}=={i.version}" for i in installed_packages # pylint: disable=not-an-iterable
+            f"{i.key}=={i.version}"
+            for i in installed_packages  # pylint: disable=not-an-iterable
         ]
-    except Exception as e: # pylint: disable=broad-except
+    except Exception as e:  # pylint: disable=broad-except
         info_dict["Error Python Packages"] = str(e)
 
     return info_dict

From 95788c66e173fdc270a840bdfa5aa8828e401613 Mon Sep 17 00:00:00 2001
From: Jeremy Fowers <jeremy.fowers@amd.com>
Date: Mon, 4 Dec 2023 11:33:07 -0500
Subject: [PATCH 2/2] fix lint

Signed-off-by: Jeremy Fowers <jeremy.fowers@amd.com>
---
 src/turnkeyml/build/ignition.py | 1 -
 src/turnkeyml/build_api.py      | 1 -
 src/turnkeyml/common/build.py   | 1 -
 3 files changed, 3 deletions(-)

diff --git a/src/turnkeyml/build/ignition.py b/src/turnkeyml/build/ignition.py
index abd39ba..add22ec 100644
--- a/src/turnkeyml/build/ignition.py
+++ b/src/turnkeyml/build/ignition.py
@@ -1,5 +1,4 @@
 from typing import Optional, List, Tuple, Union, Dict, Any, Type, Callable
-from collections.abc import Collection
 import sys
 import os
 import copy
diff --git a/src/turnkeyml/build_api.py b/src/turnkeyml/build_api.py
index 44c805c..7322a15 100644
--- a/src/turnkeyml/build_api.py
+++ b/src/turnkeyml/build_api.py
@@ -1,6 +1,5 @@
 import os
 from typing import Optional, List, Dict, Any
-from collections.abc import Collection
 import turnkeyml.build.ignition as ignition
 import turnkeyml.build.stage as stage
 import turnkeyml.common.printing as printing
diff --git a/src/turnkeyml/common/build.py b/src/turnkeyml/common/build.py
index b55ed8b..9f8ac38 100644
--- a/src/turnkeyml/common/build.py
+++ b/src/turnkeyml/common/build.py
@@ -8,7 +8,6 @@
 import subprocess
 import enum
 from typing import Optional, Any, List, Dict, Union, Type
-from collections.abc import Collection
 import dataclasses
 import hashlib
 import pkg_resources