From 1a9c2635b32814915b8e6a8af57cfd77606c1b23 Mon Sep 17 00:00:00 2001 From: Molly Sophia Date: Sat, 8 Feb 2025 12:41:43 +0800 Subject: [PATCH] rwkv7: do not quantize small yet 2D lora weights Signed-off-by: Molly Sophia --- src/llama-quant.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index fb7982655..09eb57077 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -756,10 +756,19 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: // NOTE: can't use LLM_TN here because the layer number is not known quantize &= name.find("ssm_conv1d.weight") == std::string::npos; - // do not quantize RWKV's time_mix_first tensors + // do not quantize RWKV's small yet 2D weights quantize &= name.find("time_mix_first.weight") == std::string::npos; + quantize &= name.find("time_mix_w0.weight") == std::string::npos; quantize &= name.find("time_mix_w1.weight") == std::string::npos; quantize &= name.find("time_mix_w2.weight") == std::string::npos; + quantize &= name.find("time_mix_v0.weight") == std::string::npos; + quantize &= name.find("time_mix_v1.weight") == std::string::npos; + quantize &= name.find("time_mix_v2.weight") == std::string::npos; + quantize &= name.find("time_mix_a0.weight") == std::string::npos; + quantize &= name.find("time_mix_a1.weight") == std::string::npos; + quantize &= name.find("time_mix_a2.weight") == std::string::npos; + quantize &= name.find("time_mix_g1.weight") == std::string::npos; + quantize &= name.find("time_mix_g2.weight") == std::string::npos; quantize &= name.find("time_mix_decay_w1.weight") == std::string::npos; quantize &= name.find("time_mix_decay_w2.weight") == std::string::npos; quantize &= name.find("time_mix_lerp_fused.weight") == std::string::npos;