From 9a65c7a2732bc90e0e69839a8e238fb5749c7ac3 Mon Sep 17 00:00:00 2001 From: Joan Martinez Date: Fri, 31 May 2024 15:10:43 +0200 Subject: [PATCH] fix: fix the usage of the code model --- convert-hf-to-gguf.py | 4 ++-- llama.cpp | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 2ece07f81..c04cd8dff 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -2442,10 +2442,10 @@ class JinaBertV2Model(BertModel): if 'gated_layer' in name: d1 = data[:self.intermediate_size, :] name1 = name.replace('gated_layers', 'gated_layers_w') - name1 = name.replace('up_gated_layer', 'gated_layers_w') + name1 = name.replace('up_gated_layer', 'gated_layers_v') d2 = data[self.intermediate_size:, :] name2 = name.replace('gated_layers', 'gated_layers_v') - name2 = name.replace('up_gated_layer', 'gated_layers_v') + name2 = name.replace('up_gated_layer', 'gated_layers_w') yield name1, d1 yield name2, d2 continue diff --git a/llama.cpp b/llama.cpp index 4662f1fdd..bc81963d0 100644 --- a/llama.cpp +++ b/llama.cpp @@ -5498,7 +5498,7 @@ static bool llm_load_tensors( layer.ffn_down_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}); } else { - layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}); + layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}); } layer.layer_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "weight", i), {n_embd}); @@ -8506,6 +8506,8 @@ struct llm_build_context { cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_out_norm, model.layers[il].attn_out_norm_b, LLM_NORM, cb, il); if (model.layers[il].attn_norm_2 != nullptr) { + // re-add the layer input + cur = ggml_add(ctx0, cur, inpL); cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_norm_2, model.layers[il].attn_norm_2_b, LLM_NORM, cb, il); }