diff --git a/src/llama.cpp b/src/llama.cpp index a65678fc2..a6f6ef124 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17469,6 +17469,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // NOTE: can't use LLM_TN here because the layer number is not known quantize &= name.find("ssm_conv1d.weight") == std::string::npos; + // do not quantize RWKV's time_mix_first tensors + quantize &= name.find("time_mix_first.weight") == std::string::npos; + // do not quantize relative position bias (T5) quantize &= name.find("attn_rel_b.weight") == std::string::npos;