From 9174699f84e5ee1d5d10cf924101cdcf21f50070 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 27 Dec 2023 17:24:34 +0200 Subject: [PATCH] llama : adapt plamo to new ffn ggml-ci --- llama.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 09d2ec06b..bf1b01a90 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3512,7 +3512,7 @@ static bool llm_load_tensors( layer.ffn_down = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, backend_split); layer.ffn_up = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, backend_split); - + // AWQ ScaleActivation layer layer.ffn_act = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_ACT, "scales", i), {n_ff}, backend, false); } @@ -5724,6 +5724,7 @@ struct llm_build_context { model.layers[il].ffn_up, NULL, model.layers[il].ffn_gate, NULL, model.layers[il].ffn_down, NULL, + NULL, LLM_FFN_SILU, LLM_FFN_PAR, cb, il); cb(cur, "ffn_out", il); }