-
Notifications
You must be signed in to change notification settings - Fork 168
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add sample code for GenAI model inferencing
Also, update the packaging logic to include sample code and required runtime binaries. NOTE: Native (both CPP & CS) binaries are not yet being published by the GenAI team and needs revisit once those binaries are available for download.
- Loading branch information
Showing
13 changed files
with
528 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
36 changes: 36 additions & 0 deletions
36
olive/engine/packaging/sample_code/GenAIOnnxModel/cpp/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
cmake_minimum_required(VERSION 3.10) | ||
project(olive-genai-cpp-sample) | ||
set(CMAKE_CXX_STANDARD 20) | ||
|
||
add_executable(olive-genai-cpp-sample code_sample.cpp) | ||
target_include_directories(olive-genai-cpp-sample | ||
PRIVATE include | ||
PRIVATE include/onnxruntime-genai | ||
) | ||
target_link_libraries(olive-genai-cpp-sample | ||
PRIVATE onnxruntime-genai | ||
) | ||
target_link_directories(olive-genai-cpp-sample | ||
PRIVATE lib | ||
) | ||
|
||
if (MSVC) | ||
# MSVC doesn't report correct value for __cplusplus without the explicit flag | ||
# Ref: https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ | ||
target_compile_options(olive-genai-cpp-sample PRIVATE "/Zc:__cplusplus") | ||
|
||
add_custom_command(TARGET olive-genai-cpp-sample POST_BUILD | ||
COMMAND ${CMAKE_COMMAND} -E copy_if_different | ||
"${PROJECT_SOURCE_DIR}/lib/onnxruntime.dll" | ||
$<TARGET_FILE_DIR:olive-genai-cpp-sample> | ||
COMMAND ${CMAKE_COMMAND} -E copy_if_different | ||
"${PROJECT_SOURCE_DIR}/lib/onnxruntime-genai.dll" | ||
$<TARGET_FILE_DIR:olive-genai-cpp-sample> | ||
COMMAND ${CMAKE_COMMAND} -E copy_if_different | ||
"${PROJECT_SOURCE_DIR}/lib/onnxruntime_providers_shared.dll" | ||
$<TARGET_FILE_DIR:olive-genai-cpp-sample> | ||
) | ||
endif() |
21 changes: 21 additions & 0 deletions
21
olive/engine/packaging/sample_code/GenAIOnnxModel/cpp/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Olive sample code instructions | ||
|
||
## Prerequisites | ||
Install the following: | ||
* GCC 11.0 or higher for Linux | ||
* Microsoft Visual Studio 2022 for Windows | ||
* CMake | ||
|
||
## Building sample code | ||
Run the following commands in the sample code's directory. | ||
``` | ||
mkdir build | ||
cmake -S . -B build | ||
cmake --build build | ||
``` | ||
|
||
## Running the built binary | ||
Run the following commands in the build directory. | ||
``` | ||
./olive-genai-cpp-sample <Model's directory path> | ||
``` |
94 changes: 94 additions & 0 deletions
94
olive/engine/packaging/sample_code/GenAIOnnxModel/cpp/code_sample.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
#include "nlohmann/json.hpp" | ||
#include "ort_genai.h" | ||
|
||
#include <chrono> | ||
#include <filesystem> | ||
#include <fstream> | ||
#include <iostream> | ||
|
||
namespace fs = std::filesystem; | ||
|
||
static void print_usage(int /*argc*/, char **argv) | ||
{ | ||
std::cerr << "usage: " << argv[0] << " model_path" << std::endl; | ||
} | ||
|
||
bool load_search_options(const fs::path& dirpath, std::unique_ptr<OgaGeneratorParams> ¶ms) | ||
{ | ||
const fs::path config_filepath = dirpath / "genai_config.json"; | ||
std::ifstream istrm(config_filepath); | ||
if (!istrm.is_open()) return false; | ||
|
||
const nlohmann::json j = nlohmann::json::parse(istrm); | ||
if (auto k = j.find("search"); k != j.end()) | ||
{ | ||
if (auto it = k->find("diversity_penalty"); it != k->end()) params->SetSearchOption("diversity_penalty", *it); | ||
if (auto it = k->find("do_sample"); it != k->end()) params->SetSearchOptionBool("do_sample", *it); | ||
if (auto it = k->find("early_stopping"); it != k->end()) params->SetSearchOptionBool("early_stopping", *it); | ||
if (auto it = k->find("length_penalty"); it != k->end()) params->SetSearchOption("length_penalty", *it); | ||
if (auto it = k->find("max_length"); it != k->end()) params->SetSearchOption("max_length", *it); | ||
if (auto it = k->find("min_length"); it != k->end()) params->SetSearchOption("min_length", *it); | ||
if (auto it = k->find("no_repeat_ngram_size"); it != k->end()) params->SetSearchOption("no_repeat_ngram_size", *it); | ||
if (auto it = k->find("num_beams"); it != k->end()) params->SetSearchOption("num_beams", *it); | ||
if (auto it = k->find("num_return_sequences"); it != k->end()) params->SetSearchOption("num_return_sequences", *it); | ||
if (auto it = k->find("past_present_share_buffer"); it != k->end()) params->SetSearchOptionBool("past_present_share_buffer", *it); | ||
if (auto it = k->find("repetition_penalty"); it != k->end()) params->SetSearchOption("repetition_penalty", *it); | ||
if (auto it = k->find("temperature"); it != k->end()) params->SetSearchOption("temperature", *it); | ||
if (auto it = k->find("top_k"); it != k->end()) params->SetSearchOption("top_k", *it); | ||
if (auto it = k->find("top_p"); it != k->end()) params->SetSearchOption("top_p", *it); | ||
} | ||
istrm.close(); | ||
return true; | ||
} | ||
|
||
int main(int argc, char **argv) | ||
{ | ||
if (argc != 2) | ||
{ | ||
print_usage(argc, argv); | ||
return -1; | ||
} | ||
|
||
const char *const model_path = argv[1]; | ||
|
||
std::cout << "Loading model ..." << std::endl; | ||
auto model = OgaModel::Create(model_path); | ||
|
||
std::cout << "Creating tokenizer ..." << std::endl; | ||
auto tokenizer = OgaTokenizer::Create(*model); | ||
|
||
std::cout << "Loading genai_config.json ..." << std::endl; | ||
auto params = OgaGeneratorParams::Create(*model); | ||
|
||
std::cout << "Evaluating generator params and search options ..." << std::endl; | ||
load_search_options(model_path, params); | ||
|
||
const char* const prompt = "Who is Albert Einstein?"; | ||
auto sequences = OgaSequences::Create(); | ||
|
||
std::cout << "Encoding prompt ..." << std::endl; | ||
tokenizer->Encode(prompt, *sequences); | ||
params->SetInputSequences(*sequences); | ||
|
||
std::cout << "Generating tokens ..." << std::endl; | ||
auto start = std::chrono::high_resolution_clock::now(); | ||
auto output_sequences = model->Generate(*params); | ||
auto run_time = std::chrono::duration_cast<std::chrono::seconds>(std::chrono::high_resolution_clock::now() - start); | ||
|
||
std::cout << "Decoding generated tokens ..." << std::endl; | ||
auto out_sequences = output_sequences->Get(0); | ||
auto out_string = tokenizer->Decode(out_sequences); | ||
|
||
std::cout << "Prompt: " << std::endl | ||
<< prompt << std::endl << std::endl; | ||
std::cout << "Output: " << std::endl | ||
<< out_string << std::endl << std::endl; | ||
|
||
std::cout << std::setprecision(2) | ||
<< "Tokens: " << out_sequences.size() | ||
<< ", run_time: " << run_time.count() << " seconds" | ||
<< ", Tokens/sec: " << std::setprecision(2) << out_sequences.size() / (double)run_time.count() | ||
<< std::endl; | ||
|
||
return 0; | ||
} |
7 changes: 7 additions & 0 deletions
7
olive/engine/packaging/sample_code/GenAIOnnxModel/cs/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Olive sample code instructions | ||
|
||
## Prerequisites | ||
Install Microsoft Visual Studio 2022 for Windows | ||
|
||
## Running the same code | ||
Load the included Visual Studio solution, build, and run. |
Oops, something went wrong.