Skip to content

Commit

Permalink
feat(adapters): add llama-3-1-8b to vllm, remove qwen2 templates (#172)
Browse files Browse the repository at this point in the history
Signed-off-by: Radek Ježek <radek.jezek@ibm.com>
  • Loading branch information
jezekra1 authored Nov 15, 2024
1 parent 76eda7a commit b333594
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 44 deletions.
16 changes: 0 additions & 16 deletions src/adapters/bam/chatPreset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,22 +62,6 @@ export const BAMChatLLMPreset = {
},
};
},
"qwen/qwen2-72b-instruct": (): BAMChatLLMPreset => {
const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("qwen2");

return {
base: {
parameters: {
decoding_method: "greedy",
include_stop_sequence: false,
stop_sequences: [...parameters.stop_sequence],
},
},
chat: {
messagesToPrompt: messagesToPrompt(template),
},
};
},
} as const;

export type BAMChatLLMPresetModel = keyof typeof BAMChatLLMPreset;
5 changes: 1 addition & 4 deletions src/adapters/bam/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,7 @@ export class BAMLLM extends LLM<BAMLLMOutput, BAMLLMGenerateOptions> {
};
} catch {
// TODO: remove once retrieval gets fixed on the API
if (
this.modelId === "qwen/qwen2-72b-instruct" ||
this.modelId === "meta-llama/llama-3-1-70b-instruct"
) {
if (this.modelId === "meta-llama/llama-3-1-70b-instruct") {
return {
tokenLimit: 131_072,
};
Expand Down
10 changes: 5 additions & 5 deletions src/adapters/ibm-vllm/chatPreset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ interface IBMVllmChatLLMPreset {
export const IBMVllmModel = {
LLAMA_3_1_405B_INSTRUCT_FP8: "meta-llama/llama-3-1-405b-instruct-fp8",
LLAMA_3_1_70B_INSTRUCT: "meta-llama/llama-3-1-70b-instruct",
QWEN2_72B_INSTRUCT: "qwen/qwen2-72b-instruct",
LLAMA_3_1_8B_INSTRUCT: "meta-llama/llama-3-1-8b-instruct",
GRANITE_INSTRUCT: "ibm/granite-instruct", // Generic model ID is used for ease of development, ground it once stable
} as const;
export type IBMVllmModel = (typeof IBMVllmModel)[keyof typeof IBMVllmModel];
Expand Down Expand Up @@ -76,17 +76,17 @@ export const IBMVllmChatLLMPreset = {
},
};
},
[IBMVllmModel.QWEN2_72B_INSTRUCT]: (): IBMVllmChatLLMPreset => {
const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("qwen2");
[IBMVllmModel.LLAMA_3_1_8B_INSTRUCT]: (): IBMVllmChatLLMPreset => {
const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("llama3");
return {
base: {
modelId: IBMVllmModel.QWEN2_72B_INSTRUCT,
modelId: IBMVllmModel.LLAMA_3_1_8B_INSTRUCT,
parameters: {
method: "GREEDY",
stopping: {
stop_sequences: [...parameters.stop_sequence],
include_stop_sequence: false,
max_new_tokens: 1024,
max_new_tokens: 2048,
},
},
},
Expand Down
19 changes: 0 additions & 19 deletions src/adapters/shared/llmChatTemplates.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,24 +114,6 @@ const llama3: LLMChatTemplate = {
},
};

const qwen2: LLMChatTemplate = {
template: new PromptTemplate({
schema: templateSchemaFactory(["system", "user", "assistant"] as const),
template: `{{#messages}}{{#system}}<|im_start|>system
{{system}}<|im_end|>
{{ end }}{{/system}}{{#user}}<|im_start|>user
{{user}}<|im_end|>
{{ end }}{{/user}}{{#assistant}}<|im_start|>assistant
{{assistant}}<|im_end|>
{{ end }}{{/assistant}}{{/messages}}<|im_start|>assistant
`,
}),
messagesToPrompt: messagesToPromptFactory(),
parameters: {
stop_sequence: ["<|im_end|>"],
},
};

const granite3Instruct: LLMChatTemplate = {
template: new PromptTemplate({
schema: templateSchemaFactory([
Expand Down Expand Up @@ -171,7 +153,6 @@ export class LLMChatTemplates {
protected static readonly registry = {
"llama3.1": llama31,
"llama3": llama3,
"qwen2": qwen2,
"granite3Instruct": granite3Instruct,
};

Expand Down

0 comments on commit b333594

Please sign in to comment.