From 6da6aa48b0dbcb3ec392d94538711aa2ba12aa4e Mon Sep 17 00:00:00 2001
From: Molly Sophia <mollysophia379@gmail.com>
Date: Tue, 13 Aug 2024 18:31:25 +0800
Subject: [PATCH] llama: rwkv6: Add quantization tensor exclusion

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
---
 src/llama.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/llama.cpp b/src/llama.cpp
index a65678fc2..a6f6ef124 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -17469,6 +17469,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         // NOTE: can't use LLM_TN here because the layer number is not known
         quantize &= name.find("ssm_conv1d.weight") == std::string::npos;
 
+        // do not quantize RWKV's time_mix_first tensors
+        quantize &= name.find("time_mix_first.weight") == std::string::npos;
+
         // do not quantize relative position bias (T5)
         quantize &= name.find("attn_rel_b.weight") == std::string::npos;