i-am-bee · Tomas2D · Nov 15, 2024 · Nov 15, 2024
@@ -62,22 +62,6 @@ export const BAMChatLLMPreset = {
       },
     };
   },
-  "qwen/qwen2-72b-instruct": (): BAMChatLLMPreset => {
-    const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("qwen2");
-
-    return {
-      base: {
-        parameters: {
-          decoding_method: "greedy",
-          include_stop_sequence: false,
-          stop_sequences: [...parameters.stop_sequence],
-        },
-      },
-      chat: {
-        messagesToPrompt: messagesToPrompt(template),
-      },
-    };
-  },
 } as const;
 
 export type BAMChatLLMPresetModel = keyof typeof BAMChatLLMPreset;
@@ -203,10 +203,7 @@ export class BAMLLM extends LLM<BAMLLMOutput, BAMLLMGenerateOptions> {
       };
     } catch {
       // TODO: remove once retrieval gets fixed on the API
-      if (
-        this.modelId === "qwen/qwen2-72b-instruct" ||
-        this.modelId === "meta-llama/llama-3-1-70b-instruct"
-      ) {
+      if (this.modelId === "meta-llama/llama-3-1-70b-instruct") {
         return {
           tokenLimit: 131_072,
         };

@@ -26,7 +26,7 @@ interface IBMVllmChatLLMPreset {
 export const IBMVllmModel = {
   LLAMA_3_1_405B_INSTRUCT_FP8: "meta-llama/llama-3-1-405b-instruct-fp8",
   LLAMA_3_1_70B_INSTRUCT: "meta-llama/llama-3-1-70b-instruct",
-  QWEN2_72B_INSTRUCT: "qwen/qwen2-72b-instruct",
+  LLAMA_3_1_8B_INSTRUCT: "meta-llama/llama-3-1-8b-instruct",
   GRANITE_INSTRUCT: "ibm/granite-instruct", // Generic model ID is used for ease of development, ground it once stable
 } as const;
 export type IBMVllmModel = (typeof IBMVllmModel)[keyof typeof IBMVllmModel];
@@ -76,17 +76,17 @@ export const IBMVllmChatLLMPreset = {
       },
     };
   },
-  [IBMVllmModel.QWEN2_72B_INSTRUCT]: (): IBMVllmChatLLMPreset => {
-    const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("qwen2");
+  [IBMVllmModel.LLAMA_3_1_8B_INSTRUCT]: (): IBMVllmChatLLMPreset => {
+    const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("llama3");
     return {
       base: {
-        modelId: IBMVllmModel.QWEN2_72B_INSTRUCT,
+        modelId: IBMVllmModel.LLAMA_3_1_8B_INSTRUCT,
         parameters: {
           method: "GREEDY",
           stopping: {
             stop_sequences: [...parameters.stop_sequence],
             include_stop_sequence: false,
-            max_new_tokens: 1024,
+            max_new_tokens: 2048,
           },
         },
       },

@@ -114,24 +114,6 @@ const llama3: LLMChatTemplate = {
   },
 };
 
-const qwen2: LLMChatTemplate = {
-  template: new PromptTemplate({
-    schema: templateSchemaFactory(["system", "user", "assistant"] as const),
-    template: `{{#messages}}{{#system}}<|im_start|>system
-{{system}}<|im_end|>
-{{ end }}{{/system}}{{#user}}<|im_start|>user
-{{user}}<|im_end|>
-{{ end }}{{/user}}{{#assistant}}<|im_start|>assistant
-{{assistant}}<|im_end|>
-{{ end }}{{/assistant}}{{/messages}}<|im_start|>assistant
-`,
-  }),
-  messagesToPrompt: messagesToPromptFactory(),
-  parameters: {
-    stop_sequence: ["<|im_end|>"],
-  },
-};
-
 const granite3Instruct: LLMChatTemplate = {
   template: new PromptTemplate({
     schema: templateSchemaFactory([
@@ -171,7 +153,6 @@ export class LLMChatTemplates {
   protected static readonly registry = {
     "llama3.1": llama31,
     "llama3": llama3,
-    "qwen2": qwen2,
     "granite3Instruct": granite3Instruct,
   };