From a9353ecd97820bf40997ea4f46cad0a1cfae7088 Mon Sep 17 00:00:00 2001
From: vincent <runfuture@gmail.com>
Date: Tue, 6 Feb 2024 22:16:20 +0800
Subject: [PATCH] constants expanded for minicpm

---
 llama.cpp | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index c61a3e317..802040229 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6828,15 +6828,20 @@ struct llm_build_context {
         GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
         GGML_ASSERT(n_embd_head == hparams.n_rot);
 
+        const int64_t n_embd = hparams.n_embd;
+        //TODO: if the model varies, these parameters need to be read from the model
+        const int scale_emb = 12;
+        const int dim_model_base = 256;
+        const float scale_depth = 1.4f;
+
         struct ggml_tensor * cur;
         struct ggml_tensor * inpL;
 
         inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb);
         cb(inpL, "inp_embd", -1);
 
-        // scale_emb - scale the input embeddings
-        float scale_emb = 12.0f;
-        inpL = ggml_scale(ctx0, inpL, scale_emb);
+        // scale the input embeddings
+        inpL = ggml_scale(ctx0, inpL, float(scale_emb));
         cb(inpL, "inp_scaled", -1);
 
         // inp_pos - contains the positions
@@ -6906,7 +6911,7 @@ struct llm_build_context {
             }
 
             // scale_res - scale the hidden states for residual connection
-            float scale_res = 0.2217f; // scale_depth/√(num_layers)
+            const float scale_res = scale_depth/sqrtf(float(n_layer));
             cur = ggml_scale(ctx0, cur, scale_res);
             cb(cur, "hidden_scaled", -1);
 
@@ -6948,7 +6953,7 @@ struct llm_build_context {
         cb(cur, "result_norm", -1);
 
         // lm_head scaling
-        float scale_lmhead = 1.0f/9.0f; // 1/(dim_model/256)
+        const float scale_lmhead = float(dim_model_base)/float(n_embd);
         cur = ggml_scale(ctx0, cur, scale_lmhead);
         cb(cur, "lmhead_scaling", -1);