From bce74d8212b5548409f6ef048b78cc8b6a06d92b Mon Sep 17 00:00:00 2001 From: toyer <2042519524@qq.com> Date: Wed, 3 Jul 2024 08:57:03 +0000 Subject: [PATCH] use normal glm4 chattempalte & use LLM_FFN_SWIGLU in phi3 --- convert-hf-to-gguf.py | 1 - src/llama.cpp | 19 ++++++------------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 156030071..8c0fa5d8e 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -3209,7 +3209,6 @@ class ChatGLMModel(Model): self.gguf_writer.add_token_types(toktypes) special_vocab = gguf.SpecialVocab(dir_model, load_merges=False) - special_vocab.chat_template = "chatglm4" special_vocab.merges = merges # only add special tokens when they were not already loaded from config.json special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"]) diff --git a/src/llama.cpp b/src/llama.cpp index b6b91c332..eb1bde269 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -10326,19 +10326,12 @@ struct llm_build_context { // special-case: the up and gate tensors are merged into a single tensor // TOOD: support into llm_build_ffn { - struct ggml_tensor* up = ggml_mul_mat(ctx0, model.layers[il].ffn_up, cur); - cb(up, "ffn_up", il); - - auto g = ggml_cont(ctx0, ggml_view_2d(ctx0, up, up->ne[0] / 2, up->ne[1], ggml_row_size(up->type, up->ne[0]), 0)); - auto y = ggml_cont(ctx0, ggml_view_2d(ctx0, up, up->ne[0] / 2, up->ne[1], ggml_row_size(up->type, up->ne[0]), up->nb[1] / 2)); - - y = ggml_mul(ctx0, y, ggml_silu(ctx0, g)); - cb(y, "ffn_gate", il); - - auto down = ggml_mul_mat(ctx0, model.layers[il].ffn_down, y); - cb(down, "ffn_down", il); - - cur = down; + cur = llm_build_ffn(ctx0, cur, + model.layers[il].ffn_up, NULL, NULL, + NULL, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, + NULL, + LLM_FFN_SWIGLU, LLM_FFN_SEQ, cb, il); cb(cur, "ffn_out", il); }