From a9353ecd97820bf40997ea4f46cad0a1cfae7088 Mon Sep 17 00:00:00 2001 From: vincent Date: Tue, 6 Feb 2024 22:16:20 +0800 Subject: [PATCH] constants expanded for minicpm --- llama.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/llama.cpp b/llama.cpp index c61a3e317..802040229 100644 --- a/llama.cpp +++ b/llama.cpp @@ -6828,15 +6828,20 @@ struct llm_build_context { GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); GGML_ASSERT(n_embd_head == hparams.n_rot); + const int64_t n_embd = hparams.n_embd; + //TODO: if the model varies, these parameters need to be read from the model + const int scale_emb = 12; + const int dim_model_base = 256; + const float scale_depth = 1.4f; + struct ggml_tensor * cur; struct ggml_tensor * inpL; inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); cb(inpL, "inp_embd", -1); - // scale_emb - scale the input embeddings - float scale_emb = 12.0f; - inpL = ggml_scale(ctx0, inpL, scale_emb); + // scale the input embeddings + inpL = ggml_scale(ctx0, inpL, float(scale_emb)); cb(inpL, "inp_scaled", -1); // inp_pos - contains the positions @@ -6906,7 +6911,7 @@ struct llm_build_context { } // scale_res - scale the hidden states for residual connection - float scale_res = 0.2217f; // scale_depth/√(num_layers) + const float scale_res = scale_depth/sqrtf(float(n_layer)); cur = ggml_scale(ctx0, cur, scale_res); cb(cur, "hidden_scaled", -1); @@ -6948,7 +6953,7 @@ struct llm_build_context { cb(cur, "result_norm", -1); // lm_head scaling - float scale_lmhead = 1.0f/9.0f; // 1/(dim_model/256) + const float scale_lmhead = float(dim_model_base)/float(n_embd); cur = ggml_scale(ctx0, cur, scale_lmhead); cb(cur, "lmhead_scaling", -1);