llama: rwkv6: Add quantization tensor exclusion

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
Molly Sophia 2024-08-13 18:31:25 +08:00
parent c165e34629
commit 6da6aa48b0

View file

@ -17469,6 +17469,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
// NOTE: can't use LLM_TN here because the layer number is not known // NOTE: can't use LLM_TN here because the layer number is not known
quantize &= name.find("ssm_conv1d.weight") == std::string::npos; quantize &= name.find("ssm_conv1d.weight") == std::string::npos;
// do not quantize RWKV's time_mix_first tensors
quantize &= name.find("time_mix_first.weight") == std::string::npos;
// do not quantize relative position bias (T5) // do not quantize relative position bias (T5)
quantize &= name.find("attn_rel_b.weight") == std::string::npos; quantize &= name.find("attn_rel_b.weight") == std::string::npos;