From a71041a05fbe40fc55db981b645ac7cf17e30081 Mon Sep 17 00:00:00 2001 From: Galunid Date: Sun, 22 Oct 2023 09:57:52 +0200 Subject: [PATCH] Use ggml_norm not ggml_rms_norm --- llama.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/llama.cpp b/llama.cpp index 149768e9d..4a7daffea 100644 --- a/llama.cpp +++ b/llama.cpp @@ -5797,7 +5797,7 @@ static struct ggml_cgraph * llm_build_stablelm( const float freq_base = cparams.rope_freq_base; const float freq_scale = cparams.rope_freq_scale; - const float norm_rms_eps = hparams.f_norm_rms_eps; + const float norm_eps = hparams.f_norm_eps; const int n_gpu_layers = model.n_gpu_layers; @@ -5953,9 +5953,9 @@ static struct ggml_cgraph * llm_build_stablelm( // norm { - cur = ggml_rms_norm(ctx0, inpL, norm_rms_eps); + cur = ggml_norm(ctx0, inpL, norm_eps); offload_func(cur); - ggml_set_name(cur, "rms_norm_0"); + ggml_set_name(cur, "norm_0"); // cur = cur*attn_norm(broadcasted) cur = ggml_mul(ctx0, cur, model.layers[il].attn_norm); @@ -6096,9 +6096,9 @@ static struct ggml_cgraph * llm_build_stablelm( { // norm { - cur = ggml_rms_norm(ctx0, inpFF, norm_rms_eps); + cur = ggml_norm(ctx0, inpFF, norm_eps); offload_func(cur); - ggml_set_name(cur, "rms_norm_1"); + ggml_set_name(cur, "norm_1"); // cur = cur*ffn_norm(broadcasted) cur = ggml_mul(ctx0, cur, model.layers[il].ffn_norm); @@ -6152,14 +6152,19 @@ static struct ggml_cgraph * llm_build_stablelm( // norm { - cur = ggml_rms_norm(ctx0, cur, norm_rms_eps); + cur = ggml_norm(ctx0, cur, norm_eps); offload_func_nr(cur); - ggml_set_name(cur, "rms_norm_2"); + ggml_set_name(cur, "norm_2"); // cur = cur*norm(broadcasted) cur = ggml_mul(ctx0, cur, model.output_norm); // offload_func_nr(cur); // TODO CPU + GPU mirrored backend + // ggml_set_name(cur, "result_norm"); + + cur = ggml_add(ctx0, cur, model.output_norm_b); + offload_func_nr(cur); ggml_set_name(cur, "result_norm"); + } // lm_head