Skip to content

Commit

Permalink
onnxruntime-genai based metadata and packaging
Browse files Browse the repository at this point in the history
* Added support for packaging models (and additional files) generated by
  the GenAIModelExporter. Also, updated the pass configuration to include
  search parameters that are forwarded to the generated genai_config file.
* Added support for carrying "additional files" from one pass to next. These
  files will end up in the generated models output folder and will be
  packaged.
* Two new packaging configuration options -
  ** include_sample_code
  ** inlcude_runtime_packages
  • Loading branch information
shaahji committed Apr 15, 2024
1 parent 2f4da5c commit b8f9a27
Show file tree
Hide file tree
Showing 9 changed files with 349 additions and 20 deletions.
4 changes: 4 additions & 0 deletions docs/source/features/packaging_output_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ If not specified, Olive will not package artifacts.
The version for this data asset. This is `1` by default.
* `description [str]`
The description for this data asset. This is `None` by default.
* `include_sample_code [bool]`:
Whether or not to include sample code in zip file. Defaults to True
* `include_runtime_packages [bool]`:
Whether or not to include runtime packages (like onnxruntime) in zip file. Defaults to True

You can add `PackagingConfig` to Engine configurations. e.g.:

Expand Down
4 changes: 3 additions & 1 deletion examples/llama2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,11 @@ For using ONNX runtime GenAI to optimize, follow build and installation instruct

Run the following command to execute the workflow:
```bash
python -m olive.workflows.run --config lamma2_genai.json
python llama2_genai.py [--model_name <>] [--metadata_only]
```

To generate metadata only for pre-exported onnx model, use the `--metadata_only` option.

Snippet below shows an example run of generated llama2 model.
```python
import onnxruntime_genai as og
Expand Down
60 changes: 60 additions & 0 deletions examples/llama2/llama2_genai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

import argparse
import json

import olive.workflows.run as olive_run
from olive.common.utils import set_tempdir


def get_args(raw_args):
parser = argparse.ArgumentParser(description="Llama2 optimization using Generative AI")
parser.add_argument(
"--model_name",
type=str,
default="meta-llama/Llama-2-7b-hf",
help="Model name, currently only supports llama2 7B/13B",
)
parser.add_argument(
"--metadata_only", action="store_true", required=False, help="Whether to use gpu for optimization."
)
parser.add_argument("--tempdir", type=str, help="Root directory for tempfile directories and files", required=False)

return parser.parse_args(raw_args)


def main(raw_args=None):
args = get_args(raw_args)
model_name = args.model_name

# set tempdir
set_tempdir(args.tempdir)

input_template = "llama2_genai_template.json"
with open(input_template) as f:
template_json_str = f.read()

# update model name
template_json_str = template_json_str.replace("<model_name_placeholder>", model_name)
template_json = json.loads(template_json_str)

# add pass flows
if args.metadata_only:
template_json["pass_flows"] = [["conversion", "metadata", "perf_tuning"]]
else:
template_json["pass_flows"] = [["exporter", "perf_tuning"]]
template_json["engine"]["output_dir"] = f"models/{model_name}"

# dump config
output_template = "llama2_genai.json"
with open(output_template, "w") as f:
json.dump(template_json, f, indent=4)

olive_run(template_json) # pylint: disable=not-callable


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"input_model":{
"input_model": {
"type": "PyTorchModel",
"config": {
"hf_config": {
"model_name": "meta-llama/Llama-2-7b-hf",
"model_name": "<model_name_placeholder>",
"model_class": "LlamaForCausalLM",
"task": "text-generation"
}
Expand All @@ -25,10 +25,33 @@
}
},
"passes": {
"conversion": {
"type": "OnnxConversion",
"config": {
"target_opset": 16,
"save_as_external_data": true,
"all_tensors_to_one_file": true
}
},
"exporter": {
"type": "GenAIModelExporter",
"config": {
"precision": "int4"
"precision": "int4",
"search": {
"max_length": 2048,
"min_length": 0
}
}
},
"metadata": {
"type": "GenAIModelExporter",
"config": {
"precision": "int4",
"metadata_only": true,
"search": {
"max_length": 2048,
"min_length": 0
}
}
},
"perf_tuning": {
Expand All @@ -37,7 +60,7 @@
"user_script": "user_script.py",
"dataloader_func": "dataloader_func_for_merged",
"dataloader_func_kwargs": {
"model_id": "meta-llama/Llama-2-7b-hf",
"model_id": "<model_name_placeholder>",
"past_seq_length": 0,
"seq_length": 8,
"max_seq_length": 2048
Expand All @@ -48,11 +71,18 @@
}
},
"engine": {
"packaging_config": [
{
"type": "Zipfile",
"name": "OutputModel",
"include_runtime_packages": false,
"include_sample_code": false
}
],
"log_severity_level": 0,
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_dir": "models/genai"
"output_dir": null
}
}
2 changes: 2 additions & 0 deletions olive/engine/packaging/packaging_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class PackagingConfig(ConfigBase):
type: PackagingType = PackagingType.Zipfile
name: str = "OutputModels"
config: CommonPackagingConfig = None
include_runtime_packages: bool = True
include_sample_code: bool = True

@validator("config", pre=True, always=True)
def _validate_config(cls, v, values):
Expand Down
11 changes: 6 additions & 5 deletions olive/engine/packaging/packaging_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,14 @@ def _package_candidate_models(
logger.info("Packaging output models to %s", packaging_type)

with tempfile.TemporaryDirectory() as temp_dir:

tempdir = Path(temp_dir)

if packaging_type == PackagingType.Zipfile:
cur_path = Path(__file__).parent
_package_sample_code(cur_path, tempdir)
_package_onnxruntime_packages(tempdir, next(iter(pf_footprints.values())))
if packaging_config.include_sample_code:
_package_sample_code(Path(__file__).parent, tempdir)

if packaging_config.include_runtime_packages:
_package_onnxruntime_packages(tempdir, next(iter(pf_footprints.values())))

for accelerator_spec, pf_footprint in pf_footprints.items():
footprint = footprints[accelerator_spec]
Expand Down Expand Up @@ -113,7 +114,7 @@ def _package_candidate_models(
elif packaging_type == PackagingType.AzureMLData:
_upload_to_azureml_data(azureml_client_config, model_dir, model_name, config)

model_rank += 1
model_rank += 1

if packaging_type == PackagingType.Zipfile:
_copy_models_rank(tempdir, model_info_list)
Expand Down
55 changes: 53 additions & 2 deletions olive/passes/olive_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# --------------------------------------------------------------------------
import inspect
import logging
import shutil
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, Union, get_args
Expand All @@ -12,7 +13,7 @@
from olive.common.user_module_loader import UserModuleLoader
from olive.data.config import DataConfig
from olive.hardware import DEFAULT_CPU_ACCELERATOR, AcceleratorSpec
from olive.model import CompositeModelHandler, DistributedOnnxModelHandler, OliveModelHandler
from olive.model import CompositeModelHandler, DistributedOnnxModelHandler, OliveModelHandler, ONNXModelHandler
from olive.passes.pass_config import (
PassConfigBase,
PassConfigParam,
Expand Down Expand Up @@ -189,7 +190,10 @@ def run(
for rank in range(model.num_ranks):
input_ranked_model = model.load_model(rank)
ranked_output_path = Path(output_model_path).with_suffix("") / model.ranked_model_name(rank)
self._run_for_config(input_ranked_model, data_root, config, str(ranked_output_path))
output_ranked_model = self._run_for_config(
input_ranked_model, data_root, config, str(ranked_output_path)
)
Pass._carry_forward_additional_files(input_ranked_model, output_ranked_model)

output_model = DistributedOnnxModelHandler(
model_path=str(Path(output_model_path).with_suffix("")),
Expand All @@ -211,14 +215,61 @@ def run(
)
components.append(output_model_component)
component_names.append(component_name)
Pass._carry_forward_additional_files(component_model, output_model_component)
output_model = CompositeModelHandler(components, component_names)
else:
output_model = self._run_for_config(model, data_root, config, output_model_path)
Pass._carry_forward_additional_files(model, output_model)

# assumption: the model attributes from passes, if any, are more important than
# the input model attributes, we should not update/extend anymore outside of the pass run
output_model.model_attributes = output_model.model_attributes or model.model_attributes
return output_model

@staticmethod
def _carry_forward_additional_files(input_model: OliveModelHandler, output_model: OliveModelHandler):
# NOTE: Can't use model.model_path because that always gets resolved to a filepath.
# We need the directory path here.
input_model_path = input_model.get_resource("model_path")
if not input_model_path:
return

input_model_path = Path(input_model_path)
if not input_model_path.is_dir():
return

input_model_attributes = input_model.model_attributes or {}
input_model_additional_files = set(input_model_attributes.get("additional_files", []))
if not input_model_additional_files:
return

output_model_path = Path(output_model.get_resource("model_path"))
if not output_model_path.is_dir():
if isinstance(output_model, ONNXModelHandler):
# change the "model_path" resource to the parent directory of the model file
output_model.set_resource("model_path", output_model_path.parent)
output_model.onnx_file_name = output_model_path.name
output_model_path = output_model_path.parent
else:
raise RuntimeError("Expecting the output model to be in a directory but found a file.")

output_model_attributes = output_model.model_attributes or {}
output_model_additional_files = set(output_model_attributes.get("additional_files", []))

for filepath in input_model_additional_files:
input_filepath = Path(filepath)

# Make sure we don't overwrite an existing file in the output's directory.
# The follow up pass could have *potentially* generated a file with the same name.
output_filepath = output_model_path / input_filepath.name
if not output_filepath.exists():
# TODO(team): Use symlinks instead of copying the files.
output_model_additional_files.add(str(output_filepath))
shutil.copy(str(input_filepath), str(output_filepath))

output_model_attributes["additional_files"] = list(output_model_additional_files)
output_model.model_attributes = output_model_attributes

def serialize_config(self, config: Dict[str, Any], check_object: bool = False) -> str:
"""Serialize the configuration."""
return self._config_class(**config).to_json(check_object)
Expand Down
Loading

0 comments on commit b8f9a27

Please sign in to comment.