-
Notifications
You must be signed in to change notification settings - Fork 162
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
onnxruntime-genai based metadata and packaging
* Added support for packaging models (and additional files) generated by the GenAIModelExporter. Also, updated the pass configuration to include search parameters that are forwarded to the generated genai_config file. * Added support for carrying "additional files" from one pass to next. These files will end up in the generated models output folder and will be packaged. * Two new packaging configuration options - ** include_sample_code ** inlcude_runtime_packages
- Loading branch information
Showing
9 changed files
with
349 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# ------------------------------------------------------------------------- | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
# -------------------------------------------------------------------------- | ||
|
||
import argparse | ||
import json | ||
|
||
import olive.workflows.run as olive_run | ||
from olive.common.utils import set_tempdir | ||
|
||
|
||
def get_args(raw_args): | ||
parser = argparse.ArgumentParser(description="Llama2 optimization using Generative AI") | ||
parser.add_argument( | ||
"--model_name", | ||
type=str, | ||
default="meta-llama/Llama-2-7b-hf", | ||
help="Model name, currently only supports llama2 7B/13B", | ||
) | ||
parser.add_argument( | ||
"--metadata_only", action="store_true", required=False, help="Whether to use gpu for optimization." | ||
) | ||
parser.add_argument("--tempdir", type=str, help="Root directory for tempfile directories and files", required=False) | ||
|
||
return parser.parse_args(raw_args) | ||
|
||
|
||
def main(raw_args=None): | ||
args = get_args(raw_args) | ||
model_name = args.model_name | ||
|
||
# set tempdir | ||
set_tempdir(args.tempdir) | ||
|
||
input_template = "llama2_genai_template.json" | ||
with open(input_template) as f: | ||
template_json_str = f.read() | ||
|
||
# update model name | ||
template_json_str = template_json_str.replace("<model_name_placeholder>", model_name) | ||
template_json = json.loads(template_json_str) | ||
|
||
# add pass flows | ||
if args.metadata_only: | ||
template_json["pass_flows"] = [["conversion", "metadata", "perf_tuning"]] | ||
else: | ||
template_json["pass_flows"] = [["exporter", "perf_tuning"]] | ||
template_json["engine"]["output_dir"] = f"models/{model_name}" | ||
|
||
# dump config | ||
output_template = "llama2_genai.json" | ||
with open(output_template, "w") as f: | ||
json.dump(template_json, f, indent=4) | ||
|
||
olive_run(template_json) # pylint: disable=not-callable | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.