Skip to content

Commit

Permalink
Add sample code for GenAI model inferencing
Browse files Browse the repository at this point in the history
Also, update the packaging logic to include sample code and
required runtime binaries.

NOTE: Native (both CPP & CS) binaries are not yet being published
by the GenAI team and needs revisit once those binaries are
available for download.
  • Loading branch information
shaahji committed May 2, 2024
1 parent d5337d5 commit f799ca4
Show file tree
Hide file tree
Showing 13 changed files with 528 additions and 21 deletions.
19 changes: 15 additions & 4 deletions .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ include_patterns = [
'**/*.py',
'**/*.pyi'
]
exclude_patterns = [
'**/olive/engine/packaging/sample_code'
]
command = [
'python',
'-m',
Expand All @@ -67,6 +70,7 @@ include_patterns = [
'**/*.py'
]
exclude_patterns = [
'**/olive/engine/packaging/sample_code'
]
command = [
'python',
Expand Down Expand Up @@ -94,6 +98,7 @@ include_patterns = [
'**/*.py'
]
exclude_patterns = [
'**/olive/engine/packaging/sample_code'
]
command = [
'python',
Expand Down Expand Up @@ -122,7 +127,8 @@ include_patterns = [
'**/*.pyi'
]
exclude_patterns = [
'examples/pytorch/*.py'
'examples/pytorch/*.py',
'**/olive/engine/packaging/sample_code'
]
command = [
'python',
Expand All @@ -149,7 +155,9 @@ init_command = [
[[linter]]
code = 'NOQA'
include_patterns = ['**/*.py', '**/*.pyi']
exclude_patterns = []
exclude_patterns = [
'**/olive/engine/packaging/sample_code'
]
command = [
'python',
'-m',
Expand All @@ -170,7 +178,9 @@ command = [
[[linter]]
code = 'SPACES'
include_patterns = ['**']
exclude_patterns = []
exclude_patterns = [
'**/olive/engine/packaging/sample_code'
]
command = [
'python',
'-m',
Expand All @@ -194,7 +204,8 @@ include_patterns = ['**']
exclude_patterns = [
'.lintrunner.toml',
'**/Makefile',
'**/*.bat'
'**/*.bat',
'**/olive/engine/packaging/sample_code'
]
command = [
'python',
Expand Down
4 changes: 1 addition & 3 deletions examples/llama2/llama2_model_builder_template.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,7 @@
"packaging_config": [
{
"type": "Zipfile",
"name": "OutputModel",
"include_runtime_packages": false,
"include_sample_code": false
"name": "OutputModel"
}
],
"log_severity_level": 0,
Expand Down
69 changes: 55 additions & 14 deletions olive/engine/packaging/packaging_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,11 +296,17 @@ def _package_candidate_models(
tempdir = Path(temp_dir)

if packaging_type == PackagingType.Zipfile:
best_node: FootprintNode = _get_best_candidate_node(pf_footprints, footprints)
model_attributes = best_node.model_config["config"].get("model_attributes") or {}

if packaging_config.include_sample_code:
_package_sample_code(Path(__file__).parent, tempdir)
_package_sample_code(Path(__file__).parent, tempdir, model_attributes.get("is_generative", False))

if packaging_config.include_runtime_packages:
_package_onnxruntime_packages(tempdir, next(iter(pf_footprints.values())))
if model_attributes.get("is_generative", False):
_package_onnxruntime_genai_runtime_dependencies(tempdir)
else:
_package_onnxruntime_runtime_dependencies(tempdir, next(iter(pf_footprints.values())))

for accelerator_spec, pf_footprint in pf_footprints.items():
footprint = footprints[accelerator_spec]
Expand Down Expand Up @@ -436,8 +442,9 @@ def _copy_models_rank(tempdir: Path, model_info_list: List[Dict]):
f.write(json.dumps(model_info_list))


def _package_sample_code(cur_path: Path, tempdir: Path):
copy_dir(cur_path / "sample_code", tempdir / "SampleCode")
def _package_sample_code(cur_path: Path, tempdir: Path, is_generative: bool):
subdir_name = "GenAIOnnxModel" if is_generative else "ONNXModel"
copy_dir(cur_path / "sample_code" / subdir_name, tempdir / "SampleCode")


def _package_zipfile_model(output_dir: Path, output_name: str, model_dir: Path):
Expand Down Expand Up @@ -565,7 +572,42 @@ def _generate_onnx_mlflow_model(model_dir: Path, inference_config: Dict):
return mlflow_model_path


def _package_onnxruntime_packages(tempdir: Path, pf_footprint: "Footprint"):
def _package_onnxruntime_genai_runtime_dependencies(tempdir: Path):
# pylint: disable=not-an-iterable
installed_packages = [
pkg
for pkg in pkg_resources.working_set
if pkg.key.startswith("onnxruntime-genai") or pkg.project_name.startswith("onnxruntime-genai")
]
if not installed_packages:
logger.warning("ONNXRuntime-GenAI package is not installed. Skip packaging runtime packages.")
return

DOWNLOAD_COMMAND_TEMPLATE = Template(
f"{sys.executable} -m pip download $package_name==$version --no-deps -d $python_download_path"
)
python_download_path = tempdir / "ONNXRuntimePackages" / "python"
python_download_path.mkdir(parents=True, exist_ok=True)
python_download_path = str(python_download_path)

for pkg in installed_packages:
pkg_name = pkg.key if pkg.key.startswith("onnxruntime-genai") else pkg.project_name
download_command = DOWNLOAD_COMMAND_TEMPLATE.substitute(
package_name=pkg_name, version=pkg.version, python_download_path=python_download_path
)

try:
run_subprocess(download_command)
except Exception:
logger.exception(
"Failed to download %s package. Manually download & install the required package.", pkg_name
)

# Download CPP && CS onnxruntime-genai packages
# TODO(olive-devteam): As of this writing the native packages aren't published.


def _package_onnxruntime_runtime_dependencies(tempdir: Path, pf_footprint: "Footprint"):
# pylint: disable=not-an-iterable
installed_packages = pkg_resources.working_set
onnxruntime_pkg = [i for i in installed_packages if i.key.startswith("onnxruntime")]
Expand All @@ -581,19 +623,18 @@ def _package_onnxruntime_packages(tempdir: Path, pf_footprint: "Footprint"):
logger.warning("Both ONNXRuntime and ort-nightly packages are installed. Package ort-nightly package only.")

ort_version = ort_nightly_pkg[0].version if is_nightly else onnxruntime_pkg[0].version
package_name_list = set()
use_ort_extensions = False

for model_id in pf_footprint.nodes:
if pf_footprint.get_use_ort_extensions(model_id):
use_ort_extensions = True

inference_settings = pf_footprint.get_model_inference_config(model_id)
package_name_list = []
if not inference_settings:
package_name_list.append(("onnxruntime", "ort-nightly"))
else:
if inference_settings:
ep_list = inference_settings["execution_provider"]
package_name_list.extend([get_package_name_from_ep(ep[0]) for ep in ep_list])
package_name_list = set(package_name_list)
package_name_list.update([get_package_name_from_ep(ep[0]) for ep in ep_list])
else:
package_name_list.update(["onnxruntime", "ort-nightly"])

try:
# Download Python onnxruntime package
Expand Down Expand Up @@ -637,7 +678,7 @@ def _package_onnxruntime_packages(tempdir: Path, pf_footprint: "Footprint"):
if is_nightly:
_skip_download_c_package(ort_download_path)
else:
_download_c_packages(package_name_list, ort_version, ort_download_path)
_download_native_onnx_packages(package_name_list, ort_version, ort_download_path)

except Exception:
logger.exception("Failed to download onnxruntime package. Please manually download onnxruntime package.")
Expand Down Expand Up @@ -675,7 +716,7 @@ def _download_ort_extensions_package(use_ort_extensions: bool, download_path: st
run_subprocess(download_command)


def _download_c_packages(package_name_list: List[str], ort_version: str, ort_download_path: str):
def _download_native_onnx_packages(package_name_list: List[str], ort_version: str, ort_download_path: str):
PACKAGE_DOWNLOAD_LINK_MAPPING = {
"onnxruntime": Template("https://www.nuget.org/api/v2/package/Microsoft.ML.OnnxRuntime/$ort_version"),
"onnxruntime-gpu": Template("https://www.nuget.org/api/v2/package/Microsoft.ML.OnnxRuntime.Gpu/$ort_version"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

cmake_minimum_required(VERSION 3.10)
project(olive-genai-cpp-sample)
set(CMAKE_CXX_STANDARD 20)

add_executable(olive-genai-cpp-sample code_sample.cpp)
target_include_directories(olive-genai-cpp-sample
PRIVATE include
PRIVATE include/onnxruntime-genai
)
target_link_libraries(olive-genai-cpp-sample
PRIVATE onnxruntime-genai
)
target_link_directories(olive-genai-cpp-sample
PRIVATE lib
)

if (MSVC)
# MSVC doesn't report correct value for __cplusplus without the explicit flag
# Ref: https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/
target_compile_options(olive-genai-cpp-sample PRIVATE "/Zc:__cplusplus")

add_custom_command(TARGET olive-genai-cpp-sample POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${PROJECT_SOURCE_DIR}/lib/onnxruntime.dll"
$<TARGET_FILE_DIR:olive-genai-cpp-sample>
COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${PROJECT_SOURCE_DIR}/lib/onnxruntime-genai.dll"
$<TARGET_FILE_DIR:olive-genai-cpp-sample>
COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${PROJECT_SOURCE_DIR}/lib/onnxruntime_providers_shared.dll"
$<TARGET_FILE_DIR:olive-genai-cpp-sample>
)
endif()
21 changes: 21 additions & 0 deletions olive/engine/packaging/sample_code/GenAIOnnxModel/cpp/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Olive sample code instructions

## Prerequisites
Install the following:
* GCC 11.0 or higher for Linux
* Microsoft Visual Studio 2022 for Windows
* CMake

## Building sample code
Run the following commands in the sample code's directory.
```
mkdir build
cmake -S . -B build
cmake --build build
```

## Running the built binary
Run the following commands in the build directory.
```
./olive-genai-cpp-sample <Model's directory path>
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#include "nlohmann/json.hpp"
#include "ort_genai.h"

#include <chrono>
#include <filesystem>
#include <fstream>
#include <iostream>

namespace fs = std::filesystem;

static void print_usage(int /*argc*/, char **argv)
{
std::cerr << "usage: " << argv[0] << " model_path" << std::endl;
}

bool load_search_options(const fs::path& dirpath, std::unique_ptr<OgaGeneratorParams> &params)
{
const fs::path config_filepath = dirpath / "genai_config.json";
std::ifstream istrm(config_filepath);
if (!istrm.is_open()) return false;

const nlohmann::json j = nlohmann::json::parse(istrm);
if (auto k = j.find("search"); k != j.end())
{
if (auto it = k->find("diversity_penalty"); it != k->end()) params->SetSearchOption("diversity_penalty", *it);
if (auto it = k->find("do_sample"); it != k->end()) params->SetSearchOptionBool("do_sample", *it);
if (auto it = k->find("early_stopping"); it != k->end()) params->SetSearchOptionBool("early_stopping", *it);
if (auto it = k->find("length_penalty"); it != k->end()) params->SetSearchOption("length_penalty", *it);
if (auto it = k->find("max_length"); it != k->end()) params->SetSearchOption("max_length", *it);
if (auto it = k->find("min_length"); it != k->end()) params->SetSearchOption("min_length", *it);
if (auto it = k->find("no_repeat_ngram_size"); it != k->end()) params->SetSearchOption("no_repeat_ngram_size", *it);
if (auto it = k->find("num_beams"); it != k->end()) params->SetSearchOption("num_beams", *it);
if (auto it = k->find("num_return_sequences"); it != k->end()) params->SetSearchOption("num_return_sequences", *it);
if (auto it = k->find("past_present_share_buffer"); it != k->end()) params->SetSearchOptionBool("past_present_share_buffer", *it);
if (auto it = k->find("repetition_penalty"); it != k->end()) params->SetSearchOption("repetition_penalty", *it);
if (auto it = k->find("temperature"); it != k->end()) params->SetSearchOption("temperature", *it);
if (auto it = k->find("top_k"); it != k->end()) params->SetSearchOption("top_k", *it);
if (auto it = k->find("top_p"); it != k->end()) params->SetSearchOption("top_p", *it);
}
istrm.close();
return true;
}

int main(int argc, char **argv)
{
if (argc != 2)
{
print_usage(argc, argv);
return -1;
}

const char *const model_path = argv[1];

std::cout << "Loading model ..." << std::endl;
auto model = OgaModel::Create(model_path);

std::cout << "Creating tokenizer ..." << std::endl;
auto tokenizer = OgaTokenizer::Create(*model);

std::cout << "Loading genai_config.json ..." << std::endl;
auto params = OgaGeneratorParams::Create(*model);

std::cout << "Evaluating generator params and search options ..." << std::endl;
load_search_options(model_path, params);

const char* const prompt = "Who is Albert Einstein?";
auto sequences = OgaSequences::Create();

std::cout << "Encoding prompt ..." << std::endl;
tokenizer->Encode(prompt, *sequences);
params->SetInputSequences(*sequences);

std::cout << "Generating tokens ..." << std::endl;
auto start = std::chrono::high_resolution_clock::now();
auto output_sequences = model->Generate(*params);
auto run_time = std::chrono::duration_cast<std::chrono::seconds>(std::chrono::high_resolution_clock::now() - start);

std::cout << "Decoding generated tokens ..." << std::endl;
auto out_sequences = output_sequences->Get(0);
auto out_string = tokenizer->Decode(out_sequences);

std::cout << "Prompt: " << std::endl
<< prompt << std::endl << std::endl;
std::cout << "Output: " << std::endl
<< out_string << std::endl << std::endl;

std::cout << std::setprecision(2)
<< "Tokens: " << out_sequences.size()
<< ", run_time: " << run_time.count() << " seconds"
<< ", Tokens/sec: " << std::setprecision(2) << out_sequences.size() / (double)run_time.count()
<< std::endl;

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Olive sample code instructions

## Prerequisites
Install Microsoft Visual Studio 2022 for Windows

## Running the same code
Load the included Visual Studio solution, build, and run.
Loading

0 comments on commit f799ca4

Please sign in to comment.