From 8cbaed1d9a1400576f8424920ca82f1d8c9404cc Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 11 Dec 2023 08:55:16 +0200 Subject: [PATCH] llama : fix hard-coded number of experts --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index e2a01902e8920..b9216f957e8f4 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3079,7 +3079,7 @@ static void llm_load_tensors( GGML_ASSERT(hparams.n_expert_used > 0); // MoE branch - for (int x = 0; x < 8; ++x) { + for (uint32_t x = 0; x < hparams.n_expert; ++x) { layer.ffn_gate_exp[x] = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_GATE_EXP, "weight", i, x), {n_embd, n_ff}, backend_split); layer.ffn_down_exp[x] = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_DOWN_EXP, "weight", i, x), { n_ff, n_embd}, backend_split); layer.ffn_up_exp[x] = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_UP_EXP, "weight", i, x), {n_embd, n_ff}, backend_split);