Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BFCL] add ibm-granite-20b-functioncallling model #525

Merged
merged 1 commit into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions berkeley-function-call-leaderboard/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ Below is *a table of models we support* to run our leaderboard evaluation agains
|snowflake/arctic | Prompt|
|nvidia/nemotron-4-340b-instruct| Prompt|
|THUDM/glm-4-9b-chat 💻| Function Calling|
|ibm-granite/granite-20b-functioncalling 💻| Function Calling|

Here {MODEL} 💻 means the model needs to be hosted locally and called by vllm, {MODEL} means the models that are called API calls. For models with a trailing `-FC`, it means that the model supports function-calling feature. You can check out the table summarizing feature supports among different models [here](https://gorilla.cs.berkeley.edu/blogs/8_berkeley_function_calling_leaderboard.html#prompt).

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,14 +371,20 @@
"Nemotron-4-340b-instruct (Prompt)",
"https://huggingface.co/nvidia/nemotron-4-340b-instruct",
"NVIDIA",
"nvidia-open-model-license"
"nvidia-open-model-license",
],
"ibm-granite/granite-20b-functioncalling": [
"Granite-20b-FunctionCalling (FC)",
"https://huggingface.co/ibm-granite/granite-20b-functioncalling",
"IBM",
"Apache-2.0",
],
"THUDM/glm-4-9b-chat": [
"GLM-4-9b-Chat (FC)",
"https://huggingface.co/THUDM/glm-4-9b-chat",
"THUDM",
"glm-4"
]
"glm-4",
],
}

INPUT_PRICE_PER_MILLION_TOKEN = {
Expand Down Expand Up @@ -473,7 +479,7 @@
"meta-llama/Meta-Llama-3-8B-Instruct": 73,
"meta-llama/Meta-Llama-3-70B-Instruct": 307,
"gorilla-openfunctions-v2": 83,
"THUDM/glm-4-9b-chat": 223
"THUDM/glm-4-9b-chat": 223,
}


Expand All @@ -486,6 +492,7 @@
"meetkai/functionary-small-v2.4-FC",
"snowflake/arctic",
"nvidia/nemotron-4-340b-instruct",
"ibm-granite/granite-20b-functioncalling",
"THUDM/glm-4-9b-chat",
]

Expand Down Expand Up @@ -673,7 +680,7 @@ def display_api_status_error(rest_error, executable_error, display_success=False

RED_FONT = "\033[91m"
RESET = "\033[0m"

print(f"\n{RED_FONT}{'-' * 18} Executable Categories' Error Bounds Based on API Health Status {'-' * 18}{RESET}\n")

if rest_error:
Expand All @@ -682,7 +689,7 @@ def display_api_status_error(rest_error, executable_error, display_success=False
for data, status in rest_error.errors:
print(f" - Test Case: {data['ground_truth']}")
print(f" Error Type: {status['error_type']}\n")

if executable_error:
print(f"❗️ Warning: Unable to verify health of executable APIs used in executable test categories (Non-REST). Please contact API provider.\n")
print(f"{executable_error.error_rate} APIs affected:\n")
Expand All @@ -691,8 +698,8 @@ def display_api_status_error(rest_error, executable_error, display_success=False
print(f" Error Type: {status['error_type']}\n")

print(f"{RED_FONT}{'-' * 100}\n{RESET}")


def get_executable_expected_output(prompt_file_path):
# Before we run the evaluation, we need to add the "execution_result" field to the prompt file, using the ground truth data.
prompt_content = load_file(prompt_file_path)
Expand Down
3 changes: 2 additions & 1 deletion berkeley-function-call-leaderboard/model_handler/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@
"NousResearch/Hermes-2-Pro-Mistral-7B",
"command-r-plus-FC",
"command-r-plus-FC-optimized",
"THUDM/glm-4-9b-chat"
"THUDM/glm-4-9b-chat",
"ibm-granite/granite-20b-functioncalling",
]

TEST_CATEGORIES = {
Expand Down
113 changes: 113 additions & 0 deletions berkeley-function-call-leaderboard/model_handler/granite_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import json

from model_handler.model_style import ModelStyle
from model_handler.oss_handler import OSSHandler
from model_handler.constant import GORILLA_TO_OPENAPI
from model_handler.utils import (
language_specific_pre_processing,
convert_to_tool,
augment_prompt_by_languge,
)


class GraniteHandler(OSSHandler):
def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
temperature = 0.001
super().__init__(model_name, temperature, top_p, max_tokens)

def _format_prompt(prompt, function, test_category):
prompt_str = (
"SYSTEM: You are a helpful assistant with access to the following function calls. "
"Your task is to produce a sequence of function calls necessary to generate response to the user utterance. "
"Use the following function calls as required."
"\n<|function_call_library|>\n{functions_str}\n"
'If none of the functions are relevant or the given question lacks the parameters required by the function, please output "<function_call> {"name": "no_function", "arguments": {}}".\n\n'
"USER: {query}\nASSISTANT: "
)

# Remove the language specific prompt augmentation string, such as "Note that the provided function is in Python"
language_specific_prompt_augmented_str = augment_prompt_by_languge(
"", test_category
)
if language_specific_prompt_augmented_str.strip():
prompt = prompt.replace(language_specific_prompt_augmented_str, "")

functions = language_specific_pre_processing(function, test_category, False)
functions = convert_to_tool(
functions,
GORILLA_TO_OPENAPI,
model_style=ModelStyle.OSSMODEL,
test_category=test_category,
stringify_parameters=True,
)

functions_str = "\n".join([json.dumps(func) for func in function])

prompt = prompt_str.replace("{functions_str}", functions_str).replace(
"{query}", prompt
)
return prompt

def inference(
self, question_file, test_category, num_gpus, format_prompt_func=_format_prompt
):
return super().inference(
question_file, test_category, num_gpus, format_prompt_func
)

def decode_ast(self, result, language="Python"):
decoded_outputs = []
result = [
call.strip()
for call in result.split("<function_call>")
if len(call.strip()) > 0
]

for res in result:
try:
res = json.loads(res.strip())
except:
decoded_outputs.append(res)
else:
fnname = res.get("name", "").strip()
args = res.get("arguments", {})

if fnname == "no_function":
decoded_outputs.append("No function is called")
continue

if language != "Python":
args = {k: str(v) for k, v in args.items()}

decoded_outputs.append({fnname: args})

return decoded_outputs

def decode_execute(self, result):
decoded_outputs = []
result = [
call.strip()
for call in result.split("<function_call>")
if len(call.strip()) > 0
]

for res in result:
try:
res = json.loads(res.strip())
except:
decoded_outputs.append(res)
else:
fnname = res.get("name", "").strip()
args = res.get("arguments", {})

if fnname == "no_function":
decoded_outputs.append("No function is called")
continue

# decoded_outputs.append({fnname: args})
args_str = ",".join(
[f"{argname}={repr(argval)}" for argname, argval in args.items()]
)
decoded_outputs.append(f"{fnname}({args_str})")

return decoded_outputs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from model_handler.mistral_handler import MistralHandler
from model_handler.nexus_handler import NexusHandler
from model_handler.oss_handler import OSSHandler
from model_handler.granite_handler import GraniteHandler
from model_handler.nvidia_handler import NvidiaHandler
from model_handler.glm_handler import GLMHandler

Expand Down Expand Up @@ -76,6 +77,7 @@
"command-r-plus-FC-optimized": CohereHandler,
"command-r-plus-optimized": CohereHandler,
"snowflake/arctic": ArcticHandler,
"ibm-granite/granite-20b-functioncalling": GraniteHandler,
"nvidia/nemotron-4-340b-instruct": NvidiaHandler,
"THUDM/glm-4-9b-chat": GLMHandler
}