From b18e5f5a66c953ff3189e68e9a8c0da9d0a3dd8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radek=20Je=C5=BEek?= Date: Fri, 15 Nov 2024 15:57:20 +0100 Subject: [PATCH] chore(adapters): add llama-3-1-8b to vllm, remove qwen2 templates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Radek Ježek --- src/adapters/bam/chatPreset.ts | 16 ---------------- src/adapters/bam/llm.ts | 5 +---- src/adapters/ibm-vllm/chatPreset.ts | 10 +++++----- src/adapters/shared/llmChatTemplates.ts | 19 ------------------- 4 files changed, 6 insertions(+), 44 deletions(-) diff --git a/src/adapters/bam/chatPreset.ts b/src/adapters/bam/chatPreset.ts index d0519c26..904094a2 100644 --- a/src/adapters/bam/chatPreset.ts +++ b/src/adapters/bam/chatPreset.ts @@ -62,22 +62,6 @@ export const BAMChatLLMPreset = { }, }; }, - "qwen/qwen2-72b-instruct": (): BAMChatLLMPreset => { - const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("qwen2"); - - return { - base: { - parameters: { - decoding_method: "greedy", - include_stop_sequence: false, - stop_sequences: [...parameters.stop_sequence], - }, - }, - chat: { - messagesToPrompt: messagesToPrompt(template), - }, - }; - }, } as const; export type BAMChatLLMPresetModel = keyof typeof BAMChatLLMPreset; diff --git a/src/adapters/bam/llm.ts b/src/adapters/bam/llm.ts index 0f3d169e..6d5cedb0 100644 --- a/src/adapters/bam/llm.ts +++ b/src/adapters/bam/llm.ts @@ -203,10 +203,7 @@ export class BAMLLM extends LLM { }; } catch { // TODO: remove once retrieval gets fixed on the API - if ( - this.modelId === "qwen/qwen2-72b-instruct" || - this.modelId === "meta-llama/llama-3-1-70b-instruct" - ) { + if (this.modelId === "meta-llama/llama-3-1-70b-instruct") { return { tokenLimit: 131_072, }; diff --git a/src/adapters/ibm-vllm/chatPreset.ts b/src/adapters/ibm-vllm/chatPreset.ts index 99700b8d..6a707003 100644 --- a/src/adapters/ibm-vllm/chatPreset.ts +++ b/src/adapters/ibm-vllm/chatPreset.ts @@ -26,7 +26,7 @@ interface IBMVllmChatLLMPreset { export const IBMVllmModel = { LLAMA_3_1_405B_INSTRUCT_FP8: "meta-llama/llama-3-1-405b-instruct-fp8", LLAMA_3_1_70B_INSTRUCT: "meta-llama/llama-3-1-70b-instruct", - QWEN2_72B_INSTRUCT: "qwen/qwen2-72b-instruct", + LLAMA_3_1_8B_INSTRUCT: "meta-llama/llama-3-1-8b-instruct", GRANITE_INSTRUCT: "ibm/granite-instruct", // Generic model ID is used for ease of development, ground it once stable } as const; export type IBMVllmModel = (typeof IBMVllmModel)[keyof typeof IBMVllmModel]; @@ -76,17 +76,17 @@ export const IBMVllmChatLLMPreset = { }, }; }, - [IBMVllmModel.QWEN2_72B_INSTRUCT]: (): IBMVllmChatLLMPreset => { - const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("qwen2"); + [IBMVllmModel.LLAMA_3_1_8B_INSTRUCT]: (): IBMVllmChatLLMPreset => { + const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("llama3"); return { base: { - modelId: IBMVllmModel.QWEN2_72B_INSTRUCT, + modelId: IBMVllmModel.LLAMA_3_1_8B_INSTRUCT, parameters: { method: "GREEDY", stopping: { stop_sequences: [...parameters.stop_sequence], include_stop_sequence: false, - max_new_tokens: 1024, + max_new_tokens: 2048, }, }, }, diff --git a/src/adapters/shared/llmChatTemplates.ts b/src/adapters/shared/llmChatTemplates.ts index d3341de9..226db509 100644 --- a/src/adapters/shared/llmChatTemplates.ts +++ b/src/adapters/shared/llmChatTemplates.ts @@ -114,24 +114,6 @@ const llama3: LLMChatTemplate = { }, }; -const qwen2: LLMChatTemplate = { - template: new PromptTemplate({ - schema: templateSchemaFactory(["system", "user", "assistant"] as const), - template: `{{#messages}}{{#system}}<|im_start|>system -{{system}}<|im_end|> -{{ end }}{{/system}}{{#user}}<|im_start|>user -{{user}}<|im_end|> -{{ end }}{{/user}}{{#assistant}}<|im_start|>assistant -{{assistant}}<|im_end|> -{{ end }}{{/assistant}}{{/messages}}<|im_start|>assistant -`, - }), - messagesToPrompt: messagesToPromptFactory(), - parameters: { - stop_sequence: ["<|im_end|>"], - }, -}; - const granite3Instruct: LLMChatTemplate = { template: new PromptTemplate({ schema: templateSchemaFactory([ @@ -171,7 +153,6 @@ export class LLMChatTemplates { protected static readonly registry = { "llama3.1": llama31, "llama3": llama3, - "qwen2": qwen2, "granite3Instruct": granite3Instruct, };