diff --git a/llama.cpp b/llama.cpp
index cb769ec66..02146bdd5 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3812,7 +3812,10 @@ static bool llm_load_tensors(
                         model.output        = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}, backend_output);
                     }
 
-                    const uint32_t n_ff = hparams.n_ff;
+                    const uint32_t n_ff      = hparams.n_ff;
+                    const int64_t n_embd_gqa = n_value_gqa;
+                    GGML_ASSERT(n_embd_gqa == n_embd);
+                    GGML_ASSERT(n_embd_gqa == n_key_gqa);
 
                     const int i_gpu_start = n_layer - n_gpu_layers;
 
@@ -5946,6 +5949,11 @@ struct llm_build_context {
     struct ggml_cgraph * build_gpt2() {
         struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
 
+        const int64_t n_embd_head = hparams.n_value_dim;
+        const int64_t n_embd_gqa  = hparams.n_value_gqa();
+        GGML_ASSERT(n_embd_head == hparams.n_key_dim);
+        GGML_ASSERT(n_embd_gqa  == n_embd);
+
         struct ggml_tensor * cur;
         struct ggml_tensor * pos;
         struct ggml_tensor * inpL;