Packaging for OrtGenAI exporter models

Adding support for packaging models (and additional files) generated by the GenAIModelExporter. Updated the pass configuration to include search parameters that are forwarded to the genai_config file. Also, adding two additional parameters for packaging config: + include_sample_code + include_runtime_packages
microsoft · Apr 11, 2024 · 2d3728a · 2d3728a
1 parent fc189f9
commit 2d3728a
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 8 deletions.
diff --git a/docs/source/features/packaging_output_models.md b/docs/source/features/packaging_output_models.md
@@ -138,6 +138,10 @@ If not specified, Olive will not package artifacts.
           The version for this data asset. This is `1` by default.
         * `description [str]`
           The description for this data asset. This is `None` by default.
+    * `include_sample_code [bool]`:
+      Whether or not to include sample code in zip file. Defaults to True
+    * `include_runtime_packages [bool]`:
+      Whether or not to include runtime packages (like onnxruntime) in zip file. Defaults to True
 
 You can add `PackagingConfig` to Engine configurations. e.g.:
 

diff --git a/examples/llama2/llama2_genai.json b/examples/llama2/llama2_genai.json
@@ -1,5 +1,5 @@
 {
-    "input_model":{
+    "input_model": {
         "type": "PyTorchModel",
         "config": {
             "hf_config": {
@@ -28,7 +28,11 @@
         "exporter": {
             "type": "GenAIModelExporter",
             "config": {
-                "precision": "int4"
+                "precision": "int4",
+                "search": {
+                    "max_length": 2048,
+                    "min_length": 0
+                }
             }
         },
         "perf_tuning": {
@@ -48,8 +52,15 @@
         }
     },
     "engine": {
+        "packaging_config": [
+            {
+                "type": "Zipfile",
+                "name": "OutputModels",
+                "include_runtime_packages": false,
+                "include_sample_code": false
+            }
+        ],
         "log_severity_level": 0,
-        "evaluate_input_model": false,
         "host": "local_system",
         "target": "local_system",
         "cache_dir": "cache",

diff --git a/olive/engine/packaging/packaging_config.py b/olive/engine/packaging/packaging_config.py
@@ -48,6 +48,8 @@ class PackagingConfig(ConfigBase):
     type: PackagingType = PackagingType.Zipfile
     name: str = "OutputModels"
     config: CommonPackagingConfig = None
+    include_runtime_packages: bool = True
+    include_sample_code: bool = True
 
     @validator("config", pre=True, always=True)
     def _validate_config(cls, v, values):

diff --git a/olive/engine/packaging/packaging_generator.py b/olive/engine/packaging/packaging_generator.py
@@ -67,13 +67,14 @@ def _package_candidate_models(
     logger.info("Packaging output models to %s", packaging_type)
 
     with tempfile.TemporaryDirectory() as temp_dir:
-
         tempdir = Path(temp_dir)
 
         if packaging_type == PackagingType.Zipfile:
-            cur_path = Path(__file__).parent
-            _package_sample_code(cur_path, tempdir)
-            _package_onnxruntime_packages(tempdir, next(iter(pf_footprints.values())))
+            if packaging_config.include_sample_code:
+                _package_sample_code(Path(__file__).parent, tempdir)
+
+            if packaging_config.include_runtime_packages:
+                _package_onnxruntime_packages(tempdir, next(iter(pf_footprints.values())))
 
         for accelerator_spec, pf_footprint in pf_footprints.items():
             footprint = footprints[accelerator_spec]

diff --git a/olive/passes/onnx/genai_model_exporter.py b/olive/passes/onnx/genai_model_exporter.py
@@ -4,6 +4,7 @@
 # --------------------------------------------------------------------------
 # Export a PyTorch model using the onnxruntime-genai package.
 # --------------------------------------------------------------------------
+import json
 import logging
 import os
 import tempfile
@@ -41,7 +42,10 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> Dict[str, PassCon
                 type_=GenAIModelExporter.Precision,
                 required=True,
                 description="Precision of model.",
-            )
+            ),
+            "search": PassConfigParam(
+                type_=Dict[str, Any], required=False, description="Search options to use for generate loop."
+            ),
         }
 
     def validate_search_point(
@@ -110,4 +114,14 @@ def _run_for_config(
             filename=str(output_model_filepath.name),
         )
 
+        # Override default search options with ones from config
+        genai_config_filepath = str(output_model_filepath.parent / "genai_config.json")
+        with open(genai_config_filepath) as istrm:
+            genai_config = json.load(istrm)
+
+        genai_config["search"] = {**genai_config.get("search", {}), **config.get("search", {})}
+
+        with open(genai_config_filepath, "w") as ostrm:
+            json.dump(genai_config, ostrm, indent=4)
+
         return ONNXModelHandler(output_model_filepath.parent, onnx_file_name=output_model_filepath.name)