From 324afba5ccac7250b251f4cff31c812e2e86a3fc Mon Sep 17 00:00:00 2001 From: Molly Sophia Date: Fri, 10 Jan 2025 09:42:46 +0800 Subject: [PATCH] better sanity check skipping for QRWKV6 in llama-quant thanks @compilade Signed-off-by: Molly Sophia --- src/llama-quant.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index c2acd0f25..1376b13c1 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -621,8 +621,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s qs.n_ffn_down = qs.n_ffn_gate = qs.n_ffn_up = (int)model.hparams.n_layer; - // sanity checks - if (!llama_model_is_recurrent(&model)) + // sanity checks for models that have attention layers + if (qs.n_attention_wv != 0) { const auto & n_head_kv_iter = model.hparams.n_head_kv_arr.begin(); // attention layers have a non-zero number of kv heads