use normal glm4 chattempalte & use LLM_FFN_SWIGLU in phi3

This commit is contained in:
toyer 2024-07-03 08:57:03 +00:00
parent bf54db218e
commit bce74d8212
2 changed files with 6 additions and 14 deletions

View file

@ -3209,7 +3209,6 @@ class ChatGLMModel(Model):
self.gguf_writer.add_token_types(toktypes) self.gguf_writer.add_token_types(toktypes)
special_vocab = gguf.SpecialVocab(dir_model, load_merges=False) special_vocab = gguf.SpecialVocab(dir_model, load_merges=False)
special_vocab.chat_template = "chatglm4"
special_vocab.merges = merges special_vocab.merges = merges
# only add special tokens when they were not already loaded from config.json # only add special tokens when they were not already loaded from config.json
special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"]) special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])

View file

@ -10326,19 +10326,12 @@ struct llm_build_context {
// special-case: the up and gate tensors are merged into a single tensor // special-case: the up and gate tensors are merged into a single tensor
// TOOD: support into llm_build_ffn // TOOD: support into llm_build_ffn
{ {
struct ggml_tensor* up = ggml_mul_mat(ctx0, model.layers[il].ffn_up, cur); cur = llm_build_ffn(ctx0, cur,
cb(up, "ffn_up", il); model.layers[il].ffn_up, NULL, NULL,
NULL, NULL, NULL,
auto g = ggml_cont(ctx0, ggml_view_2d(ctx0, up, up->ne[0] / 2, up->ne[1], ggml_row_size(up->type, up->ne[0]), 0)); model.layers[il].ffn_down, NULL, NULL,
auto y = ggml_cont(ctx0, ggml_view_2d(ctx0, up, up->ne[0] / 2, up->ne[1], ggml_row_size(up->type, up->ne[0]), up->nb[1] / 2)); NULL,
LLM_FFN_SWIGLU, LLM_FFN_SEQ, cb, il);
y = ggml_mul(ctx0, y, ggml_silu(ctx0, g));
cb(y, "ffn_gate", il);
auto down = ggml_mul_mat(ctx0, model.layers[il].ffn_down, y);
cb(down, "ffn_down", il);
cur = down;
cb(cur, "ffn_out", il); cb(cur, "ffn_out", il);
} }