From e0ea51144ef9c5be2eafee5275ce37f8b414615b Mon Sep 17 00:00:00 2001
From: Molly Sophia <mollysophia379@gmail.com>
Date: Mon, 26 Aug 2024 09:32:16 +0800
Subject: [PATCH] llama: rwkv6: Keep ``time_mix_w1/w2`` as F32

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
---
 convert_hf_to_gguf.py | 2 ++
 src/llama.cpp         | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 7374cb25e..27ac34b81 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -300,6 +300,8 @@ class Model:
                             gguf.MODEL_TENSOR.TOKEN_TYPES,
                             gguf.MODEL_TENSOR.SSM_CONV1D,
                             gguf.MODEL_TENSOR.TIME_MIX_FIRST,
+                            gguf.MODEL_TENSOR.TIME_MIX_W1,
+                            gguf.MODEL_TENSOR.TIME_MIX_W2,
                         )
                     )
                     or not new_name.endswith(".weight")
diff --git a/src/llama.cpp b/src/llama.cpp
index 62e8c0c34..e437d265e 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -17472,6 +17472,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 
         // do not quantize RWKV's time_mix_first tensors
         quantize &= name.find("time_mix_first.weight") == std::string::npos;
+        quantize &= name.find("time_mix_w1.weight") == std::string::npos;
+        quantize &= name.find("time_mix_w2.weight") == std::string::npos;
 
         // do not quantize relative position bias (T5)
         quantize &= name.find("attn_rel_b.weight") == std::string::npos;