From e92c74f4a1b88a0065b7fe1faabbc4ccfa579a47 Mon Sep 17 00:00:00 2001 From: Layl Bongers <3094382+LaylBongers@users.noreply.github.com> Date: Mon, 15 Apr 2024 12:05:47 +0200 Subject: [PATCH] Fix model loading --- src/llama.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/llama.cpp b/src/llama.cpp index 50a7d5ff3..195abba77 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -1345,6 +1345,7 @@ static const std::map> LLM_TENSOR_NA LLM_ARCH_RWKV, { { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, }, }, { @@ -8226,6 +8227,16 @@ static bool llm_load_tensors( case LLM_ARCH_RWKV: { model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); + + for (int i = 0; i < n_layer; ++i) { + ggml_context * ctx_layer = ctx_for_layer(i); + + auto & layer = model.layers[i]; + + layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}); + layer.attn_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "bias", i), {n_embd}); + } + } default: throw std::runtime_error("unknown architecture");