From c7b9a2e85ee8881369aa3330a16945e7bb511b00 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Tue, 9 Apr 2024 00:58:50 +0200 Subject: [PATCH] llama: dbrx: fix ggml context of the attention outputs weight --- llama.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index 8db50e0fd..a67149c5f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4690,7 +4690,7 @@ static bool llm_load_tensors( layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}); layer.wqkv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}); - layer.wo = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}); + layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}); layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); @@ -4698,7 +4698,6 @@ static bool llm_load_tensors( layer.ffn_gate_exps = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE_EXPS,"weight", i), {n_embd, n_ff, n_expert}); layer.ffn_down_exps = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN_EXPS,"weight", i), {n_ff, n_embd, n_expert}); layer.ffn_up_exps = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), {n_embd, n_ff, n_expert}); - } } break; case LLM_ARCH_BAICHUAN: