From 324afba5ccac7250b251f4cff31c812e2e86a3fc Mon Sep 17 00:00:00 2001
From: Molly Sophia <mollysophia379@gmail.com>
Date: Fri, 10 Jan 2025 09:42:46 +0800
Subject: [PATCH] better sanity check skipping for QRWKV6 in llama-quant

thanks @compilade

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
---
 src/llama-quant.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index c2acd0f25..1376b13c1 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -621,8 +621,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 
     qs.n_ffn_down = qs.n_ffn_gate = qs.n_ffn_up = (int)model.hparams.n_layer;
 
-    // sanity checks
-    if (!llama_model_is_recurrent(&model))
+    // sanity checks for models that have attention layers
+    if (qs.n_attention_wv != 0)
     {
         const auto & n_head_kv_iter = model.hparams.n_head_kv_arr.begin();
         // attention layers have a non-zero number of kv heads