onnxruntime-genai based metadata and packaging

* Added support for packaging models (and additional files) generated by the GenAIModelExporter. Also, updated the pass configuration to include search parameters that are forwarded to the generated genai_config file. * Added support for carrying "additional files" from one pass to next. These files will end up in the generated models output folder and will be packaged. * Two new packaging configuration options - ** include_sample_code ** inlcude_runtime_packages
microsoft · Apr 15, 2024 · b8f9a27 · b8f9a27
1 parent 2f4da5c
commit b8f9a27
Show file tree

Hide file tree

Showing 9 changed files with 349 additions and 20 deletions.
diff --git a/docs/source/features/packaging_output_models.md b/docs/source/features/packaging_output_models.md
@@ -138,6 +138,10 @@ If not specified, Olive will not package artifacts.
           The version for this data asset. This is `1` by default.
         * `description [str]`
           The description for this data asset. This is `None` by default.
+    * `include_sample_code [bool]`:
+      Whether or not to include sample code in zip file. Defaults to True
+    * `include_runtime_packages [bool]`:
+      Whether or not to include runtime packages (like onnxruntime) in zip file. Defaults to True
 
 You can add `PackagingConfig` to Engine configurations. e.g.:
 

diff --git a/examples/llama2/README.md b/examples/llama2/README.md
@@ -114,9 +114,11 @@ For using ONNX runtime GenAI to optimize, follow build and installation instruct
 
 Run the following command to execute the workflow:
 ```bash
-python -m olive.workflows.run --config lamma2_genai.json
+python llama2_genai.py [--model_name <>] [--metadata_only]
 ```
 
+To generate metadata only for pre-exported onnx model, use the `--metadata_only` option.
+
 Snippet below shows an example run of generated llama2 model.
 ```python
 import onnxruntime_genai as og

diff --git a/examples/llama2/llama2_genai.py b/examples/llama2/llama2_genai.py
@@ -0,0 +1,60 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+import argparse
+import json
+
+import olive.workflows.run as olive_run
+from olive.common.utils import set_tempdir
+
+
+def get_args(raw_args):
+    parser = argparse.ArgumentParser(description="Llama2 optimization using Generative AI")
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        default="meta-llama/Llama-2-7b-hf",
+        help="Model name, currently only supports llama2 7B/13B",
+    )
+    parser.add_argument(
+        "--metadata_only", action="store_true", required=False, help="Whether to use gpu for optimization."
+    )
+    parser.add_argument("--tempdir", type=str, help="Root directory for tempfile directories and files", required=False)
+
+    return parser.parse_args(raw_args)
+
+
+def main(raw_args=None):
+    args = get_args(raw_args)
+    model_name = args.model_name
+
+    # set tempdir
+    set_tempdir(args.tempdir)
+
+    input_template = "llama2_genai_template.json"
+    with open(input_template) as f:
+        template_json_str = f.read()
+
+    # update model name
+    template_json_str = template_json_str.replace("<model_name_placeholder>", model_name)
+    template_json = json.loads(template_json_str)
+
+    # add pass flows
+    if args.metadata_only:
+        template_json["pass_flows"] = [["conversion", "metadata", "perf_tuning"]]
+    else:
+        template_json["pass_flows"] = [["exporter", "perf_tuning"]]
+    template_json["engine"]["output_dir"] = f"models/{model_name}"
+
+    # dump config
+    output_template = "llama2_genai.json"
+    with open(output_template, "w") as f:
+        json.dump(template_json, f, indent=4)
+
+    olive_run(template_json)  # pylint: disable=not-callable
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/llama2/llama2_genai.json → examples/llama2/llama2_genai_template.json b/examples/llama2/llama2_genai.json → examples/llama2/llama2_genai_template.json
@@ -1,9 +1,9 @@
 {
-    "input_model":{
+    "input_model": {
         "type": "PyTorchModel",
         "config": {
             "hf_config": {
-                "model_name": "meta-llama/Llama-2-7b-hf",
+                "model_name": "<model_name_placeholder>",
                 "model_class": "LlamaForCausalLM",
                 "task": "text-generation"
             }
@@ -25,10 +25,33 @@
         }
     },
     "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "config": {
+                "target_opset": 16,
+                "save_as_external_data": true,
+                "all_tensors_to_one_file": true
+            }
+        },
         "exporter": {
             "type": "GenAIModelExporter",
             "config": {
-                "precision": "int4"
+                "precision": "int4",
+                "search": {
+                    "max_length": 2048,
+                    "min_length": 0
+                }
+            }
+        },
+        "metadata": {
+            "type": "GenAIModelExporter",
+            "config": {
+                "precision": "int4",
+                "metadata_only": true,
+                "search": {
+                    "max_length": 2048,
+                    "min_length": 0
+                }
             }
         },
         "perf_tuning": {
@@ -37,7 +60,7 @@
                 "user_script": "user_script.py",
                 "dataloader_func": "dataloader_func_for_merged",
                 "dataloader_func_kwargs": {
-                    "model_id": "meta-llama/Llama-2-7b-hf",
+                    "model_id": "<model_name_placeholder>",
                     "past_seq_length": 0,
                     "seq_length": 8,
                     "max_seq_length": 2048
@@ -48,11 +71,18 @@
         }
     },
     "engine": {
+        "packaging_config": [
+            {
+                "type": "Zipfile",
+                "name": "OutputModel",
+                "include_runtime_packages": false,
+                "include_sample_code": false
+            }
+        ],
         "log_severity_level": 0,
-        "evaluate_input_model": false,
         "host": "local_system",
         "target": "local_system",
         "cache_dir": "cache",
-        "output_dir": "models/genai"
+        "output_dir": null
     }
 }
diff --git a/olive/engine/packaging/packaging_config.py b/olive/engine/packaging/packaging_config.py
@@ -48,6 +48,8 @@ class PackagingConfig(ConfigBase):
     type: PackagingType = PackagingType.Zipfile
     name: str = "OutputModels"
     config: CommonPackagingConfig = None
+    include_runtime_packages: bool = True
+    include_sample_code: bool = True
 
     @validator("config", pre=True, always=True)
     def _validate_config(cls, v, values):

diff --git a/olive/engine/packaging/packaging_generator.py b/olive/engine/packaging/packaging_generator.py
@@ -67,13 +67,14 @@ def _package_candidate_models(
     logger.info("Packaging output models to %s", packaging_type)
 
     with tempfile.TemporaryDirectory() as temp_dir:
-
         tempdir = Path(temp_dir)
 
         if packaging_type == PackagingType.Zipfile:
-            cur_path = Path(__file__).parent
-            _package_sample_code(cur_path, tempdir)
-            _package_onnxruntime_packages(tempdir, next(iter(pf_footprints.values())))
+            if packaging_config.include_sample_code:
+                _package_sample_code(Path(__file__).parent, tempdir)
+
+            if packaging_config.include_runtime_packages:
+                _package_onnxruntime_packages(tempdir, next(iter(pf_footprints.values())))
 
         for accelerator_spec, pf_footprint in pf_footprints.items():
             footprint = footprints[accelerator_spec]
@@ -113,7 +114,7 @@ def _package_candidate_models(
                     elif packaging_type == PackagingType.AzureMLData:
                         _upload_to_azureml_data(azureml_client_config, model_dir, model_name, config)
 
-                model_rank += 1
+                    model_rank += 1
 
         if packaging_type == PackagingType.Zipfile:
             _copy_models_rank(tempdir, model_info_list)

diff --git a/olive/passes/olive_pass.py b/olive/passes/olive_pass.py
@@ -4,6 +4,7 @@
 # --------------------------------------------------------------------------
 import inspect
 import logging
+import shutil
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, Union, get_args
@@ -12,7 +13,7 @@
 from olive.common.user_module_loader import UserModuleLoader
 from olive.data.config import DataConfig
 from olive.hardware import DEFAULT_CPU_ACCELERATOR, AcceleratorSpec
-from olive.model import CompositeModelHandler, DistributedOnnxModelHandler, OliveModelHandler
+from olive.model import CompositeModelHandler, DistributedOnnxModelHandler, OliveModelHandler, ONNXModelHandler
 from olive.passes.pass_config import (
     PassConfigBase,
     PassConfigParam,
@@ -189,7 +190,10 @@ def run(
             for rank in range(model.num_ranks):
                 input_ranked_model = model.load_model(rank)
                 ranked_output_path = Path(output_model_path).with_suffix("") / model.ranked_model_name(rank)
-                self._run_for_config(input_ranked_model, data_root, config, str(ranked_output_path))
+                output_ranked_model = self._run_for_config(
+                    input_ranked_model, data_root, config, str(ranked_output_path)
+                )
+                Pass._carry_forward_additional_files(input_ranked_model, output_ranked_model)
 
             output_model = DistributedOnnxModelHandler(
                 model_path=str(Path(output_model_path).with_suffix("")),
@@ -211,14 +215,61 @@ def run(
                 )
                 components.append(output_model_component)
                 component_names.append(component_name)
+                Pass._carry_forward_additional_files(component_model, output_model_component)
             output_model = CompositeModelHandler(components, component_names)
         else:
             output_model = self._run_for_config(model, data_root, config, output_model_path)
+            Pass._carry_forward_additional_files(model, output_model)
+
         # assumption: the model attributes from passes, if any, are more important than
         # the input model attributes, we should not update/extend anymore outside of the pass run
         output_model.model_attributes = output_model.model_attributes or model.model_attributes
         return output_model
 
+    @staticmethod
+    def _carry_forward_additional_files(input_model: OliveModelHandler, output_model: OliveModelHandler):
+        # NOTE: Can't use model.model_path because that always gets resolved to a filepath.
+        # We need the directory path here.
+        input_model_path = input_model.get_resource("model_path")
+        if not input_model_path:
+            return
+
+        input_model_path = Path(input_model_path)
+        if not input_model_path.is_dir():
+            return
+
+        input_model_attributes = input_model.model_attributes or {}
+        input_model_additional_files = set(input_model_attributes.get("additional_files", []))
+        if not input_model_additional_files:
+            return
+
+        output_model_path = Path(output_model.get_resource("model_path"))
+        if not output_model_path.is_dir():
+            if isinstance(output_model, ONNXModelHandler):
+                # change the "model_path" resource to the parent directory of the model file
+                output_model.set_resource("model_path", output_model_path.parent)
+                output_model.onnx_file_name = output_model_path.name
+                output_model_path = output_model_path.parent
+            else:
+                raise RuntimeError("Expecting the output model to be in a directory but found a file.")
+
+        output_model_attributes = output_model.model_attributes or {}
+        output_model_additional_files = set(output_model_attributes.get("additional_files", []))
+
+        for filepath in input_model_additional_files:
+            input_filepath = Path(filepath)
+
+            # Make sure we don't overwrite an existing file in the output's directory.
+            # The follow up pass could have *potentially* generated a file with the same name.
+            output_filepath = output_model_path / input_filepath.name
+            if not output_filepath.exists():
+                # TODO(team): Use symlinks instead of copying the files.
+                output_model_additional_files.add(str(output_filepath))
+                shutil.copy(str(input_filepath), str(output_filepath))
+
+        output_model_attributes["additional_files"] = list(output_model_additional_files)
+        output_model.model_attributes = output_model_attributes
+
     def serialize_config(self, config: Dict[str, Any], check_object: bool = False) -> str:
         """Serialize the configuration."""
         return self._config_class(**config).to_json(check_object)