diff --git a/llama.cpp b/llama.cpp
index 802040229..f3c5146d1 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6830,8 +6830,8 @@ struct llm_build_context {
 
         const int64_t n_embd = hparams.n_embd;
         //TODO: if the model varies, these parameters need to be read from the model
-        const int scale_emb = 12;
-        const int dim_model_base = 256;
+        const int64_t n_embd_base = 256;
+        const float scale_embd  = 12.0f;
         const float scale_depth = 1.4f;
 
         struct ggml_tensor * cur;
@@ -6841,7 +6841,7 @@ struct llm_build_context {
         cb(inpL, "inp_embd", -1);
 
         // scale the input embeddings
-        inpL = ggml_scale(ctx0, inpL, float(scale_emb));
+        inpL = ggml_scale(ctx0, inpL, scale_embd);
         cb(inpL, "inp_scaled", -1);
 
         // inp_pos - contains the positions
@@ -6953,7 +6953,7 @@ struct llm_build_context {
         cb(cur, "result_norm", -1);
 
         // lm_head scaling
-        const float scale_lmhead = float(dim_model_base)/float(n_embd);
+        const float scale_lmhead = float(n_embd_base)/float(n_embd);
         cur = ggml_scale(ctx0, cur, scale_lmhead);
         cb(cur, "lmhead_scaling", -1);