diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index c2acd0f25..1376b13c1 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -621,8 +621,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 
     qs.n_ffn_down = qs.n_ffn_gate = qs.n_ffn_up = (int)model.hparams.n_layer;
 
-    // sanity checks
-    if (!llama_model_is_recurrent(&model))
+    // sanity checks for models that have attention layers
+    if (qs.n_attention_wv != 0)
     {
         const auto & n_head_kv_iter = model.hparams.n_head_kv_arr.begin();
         // attention layers have a non-zero number of kv heads