ShishirPatil · ShishirPatil · Jul 17, 2024 · Jul 12, 2024
diff --git a/berkeley-function-call-leaderboard/README.md b/berkeley-function-call-leaderboard/README.md
@@ -194,6 +194,7 @@ Below is *a table of models we support* to run our leaderboard evaluation agains
 |snowflake/arctic | Prompt|
 |nvidia/nemotron-4-340b-instruct| Prompt|
 |THUDM/glm-4-9b-chat 💻| Function Calling|
+|ibm-granite/granite-20b-functioncalling 💻| Function Calling|
 
 Here {MODEL} 💻 means the model needs to be hosted locally and called by vllm, {MODEL} means the models that are called API calls. For models with a trailing `-FC`, it means that the model supports function-calling feature. You can check out the table summarizing feature supports among different models [here](https://gorilla.cs.berkeley.edu/blogs/8_berkeley_function_calling_leaderboard.html#prompt).
 

diff --git a/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py
@@ -371,14 +371,20 @@
         "Nemotron-4-340b-instruct (Prompt)",
         "https://huggingface.co/nvidia/nemotron-4-340b-instruct",
         "NVIDIA",
-        "nvidia-open-model-license"
+        "nvidia-open-model-license",
+    ],
+    "ibm-granite/granite-20b-functioncalling": [
+        "Granite-20b-FunctionCalling (FC)",
+        "https://huggingface.co/ibm-granite/granite-20b-functioncalling",
+        "IBM",
+        "Apache-2.0",
     ],
     "THUDM/glm-4-9b-chat": [
         "GLM-4-9b-Chat (FC)",
         "https://huggingface.co/THUDM/glm-4-9b-chat",
         "THUDM",
-        "glm-4"
-    ]
+        "glm-4",
+    ],
 }
 
 INPUT_PRICE_PER_MILLION_TOKEN = {
@@ -473,7 +479,7 @@
     "meta-llama/Meta-Llama-3-8B-Instruct": 73,
     "meta-llama/Meta-Llama-3-70B-Instruct": 307,
     "gorilla-openfunctions-v2": 83,
-    "THUDM/glm-4-9b-chat": 223
+    "THUDM/glm-4-9b-chat": 223,
 }
 
 
@@ -486,6 +492,7 @@
     "meetkai/functionary-small-v2.4-FC",
     "snowflake/arctic",
     "nvidia/nemotron-4-340b-instruct",
+    "ibm-granite/granite-20b-functioncalling",
     "THUDM/glm-4-9b-chat",
 ]
 
@@ -673,7 +680,7 @@ def display_api_status_error(rest_error, executable_error, display_success=False
 
     RED_FONT = "\033[91m"
     RESET = "\033[0m"
-    
+
     print(f"\n{RED_FONT}{'-' * 18} Executable Categories' Error Bounds Based on API Health Status {'-' * 18}{RESET}\n")
 
     if rest_error:
@@ -682,7 +689,7 @@ def display_api_status_error(rest_error, executable_error, display_success=False
         for data, status in rest_error.errors:
             print(f"  - Test Case: {data['ground_truth']}")
             print(f"    Error Type: {status['error_type']}\n")
-            
+
     if executable_error:
         print(f"❗️ Warning: Unable to verify health of executable APIs used in executable test categories (Non-REST). Please contact API provider.\n")
         print(f"{executable_error.error_rate} APIs affected:\n")
@@ -691,8 +698,8 @@ def display_api_status_error(rest_error, executable_error, display_success=False
             print(f"    Error Type: {status['error_type']}\n")
 
     print(f"{RED_FONT}{'-' * 100}\n{RESET}")
-    
-    
+
+
 def get_executable_expected_output(prompt_file_path):
     # Before we run the evaluation, we need to add the "execution_result" field to the prompt file, using the ground truth data.
     prompt_content = load_file(prompt_file_path)

diff --git a/berkeley-function-call-leaderboard/model_handler/constant.py b/berkeley-function-call-leaderboard/model_handler/constant.py
@@ -143,7 +143,8 @@
     "NousResearch/Hermes-2-Pro-Mistral-7B",
     "command-r-plus-FC",
     "command-r-plus-FC-optimized",
-    "THUDM/glm-4-9b-chat"
+    "THUDM/glm-4-9b-chat",
+    "ibm-granite/granite-20b-functioncalling",
 ]
 
 TEST_CATEGORIES = {

diff --git a/berkeley-function-call-leaderboard/model_handler/granite_handler.py b/berkeley-function-call-leaderboard/model_handler/granite_handler.py
@@ -0,0 +1,113 @@
+import json
+
+from model_handler.model_style import ModelStyle
+from model_handler.oss_handler import OSSHandler
+from model_handler.constant import GORILLA_TO_OPENAPI
+from model_handler.utils import (
+    language_specific_pre_processing,
+    convert_to_tool,
+    augment_prompt_by_languge,
+)
+
+
+class GraniteHandler(OSSHandler):
+    def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None:
+        temperature = 0.001
+        super().__init__(model_name, temperature, top_p, max_tokens)
+
+    def _format_prompt(prompt, function, test_category):
+        prompt_str = (
+            "SYSTEM: You are a helpful assistant with access to the following function calls. "
+            "Your task is to produce a sequence of function calls necessary to generate response to the user utterance. "
+            "Use the following function calls as required."
+            "\n<|function_call_library|>\n{functions_str}\n"
+            'If none of the functions are relevant or the given question lacks the parameters required by the function, please output "<function_call> {"name": "no_function", "arguments": {}}".\n\n'
+            "USER: {query}\nASSISTANT: "
+        )
+
+        # Remove the language specific prompt augmentation string, such as "Note that the provided function is in Python"
+        language_specific_prompt_augmented_str = augment_prompt_by_languge(
+            "", test_category
+        )
+        if language_specific_prompt_augmented_str.strip():
+            prompt = prompt.replace(language_specific_prompt_augmented_str, "")
+
+        functions = language_specific_pre_processing(function, test_category, False)
+        functions = convert_to_tool(
+            functions,
+            GORILLA_TO_OPENAPI,
+            model_style=ModelStyle.OSSMODEL,
+            test_category=test_category,
+            stringify_parameters=True,
+        )
+
+        functions_str = "\n".join([json.dumps(func) for func in function])
+
+        prompt = prompt_str.replace("{functions_str}", functions_str).replace(
+            "{query}", prompt
+        )
+        return prompt
+
+    def inference(
+        self, question_file, test_category, num_gpus, format_prompt_func=_format_prompt
+    ):
+        return super().inference(
+            question_file, test_category, num_gpus, format_prompt_func
+        )
+
+    def decode_ast(self, result, language="Python"):
+        decoded_outputs = []
+        result = [
+            call.strip()
+            for call in result.split("<function_call>")
+            if len(call.strip()) > 0
+        ]
+
+        for res in result:
+            try:
+                res = json.loads(res.strip())
+            except:
+                decoded_outputs.append(res)
+            else:
+                fnname = res.get("name", "").strip()
+                args = res.get("arguments", {})
+
+                if fnname == "no_function":
+                    decoded_outputs.append("No function is called")
+                    continue
+
+                if language != "Python":
+                    args = {k: str(v) for k, v in args.items()}
+
+                decoded_outputs.append({fnname: args})
+
+        return decoded_outputs
+
+    def decode_execute(self, result):
+        decoded_outputs = []
+        result = [
+            call.strip()
+            for call in result.split("<function_call>")
+            if len(call.strip()) > 0
+        ]
+
+        for res in result:
+            try:
+                res = json.loads(res.strip())
+            except:
+                decoded_outputs.append(res)
+            else:
+                fnname = res.get("name", "").strip()
+                args = res.get("arguments", {})
+
+                if fnname == "no_function":
+                    decoded_outputs.append("No function is called")
+                    continue
+
+                # decoded_outputs.append({fnname: args})
+                args_str = ",".join(
+                    [f"{argname}={repr(argval)}" for argname, argval in args.items()]
+                )
+                decoded_outputs.append(f"{fnname}({args_str})")
+
+        return decoded_outputs
diff --git a/berkeley-function-call-leaderboard/model_handler/handler_map.py b/berkeley-function-call-leaderboard/model_handler/handler_map.py
@@ -16,6 +16,7 @@
 from model_handler.mistral_handler import MistralHandler
 from model_handler.nexus_handler import NexusHandler
 from model_handler.oss_handler import OSSHandler
+from model_handler.granite_handler import GraniteHandler
 from model_handler.nvidia_handler import NvidiaHandler
 from model_handler.glm_handler import GLMHandler
 
@@ -76,6 +77,7 @@
     "command-r-plus-FC-optimized": CohereHandler,
     "command-r-plus-optimized": CohereHandler,
     "snowflake/arctic": ArcticHandler,
+    "ibm-granite/granite-20b-functioncalling": GraniteHandler,
     "nvidia/nemotron-4-340b-instruct": NvidiaHandler,
     "THUDM/glm-4-9b-chat": GLMHandler
 }