llama: dbrx: fix ggml context of the attention outputs weight

2024-04-09 00:58:50 +02:00 · 2024-04-09 00:58:50 +02:00 · c7b9a2e85e
commit c7b9a2e85e
parent 55943a281f
1 changed files with 1 additions and 2 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -4690,7 +4690,7 @@ static bool llm_load_tensors(
                    layer.attn_norm   = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM,  "weight", i), {n_embd});

                    layer.wqkv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa});
-                    layer.wo   = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd});
+                    layer.wo   = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd});

                    layer.attn_out_norm   = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd});

@ -4698,7 +4698,6 @@ static bool llm_load_tensors(
                    layer.ffn_gate_exps = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE_EXPS,"weight", i), {n_embd, n_ff,   n_expert});
                    layer.ffn_down_exps = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN_EXPS,"weight", i), {n_ff,   n_embd, n_expert});
                    layer.ffn_up_exps   = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP_EXPS,  "weight", i), {n_embd, n_ff,   n_expert});
-
                }
            } break;
            case LLM_ARCH_BAICHUAN: