llama: rwkv6: Keep `time_mix_w1/w2` as F32

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
Molly Sophia 2024-08-26 09:32:16 +08:00
parent 601b5920c6
commit e0ea51144e
2 changed files with 4 additions and 0 deletions

View file

@ -300,6 +300,8 @@ class Model:
gguf.MODEL_TENSOR.TOKEN_TYPES,
gguf.MODEL_TENSOR.SSM_CONV1D,
gguf.MODEL_TENSOR.TIME_MIX_FIRST,
gguf.MODEL_TENSOR.TIME_MIX_W1,
gguf.MODEL_TENSOR.TIME_MIX_W2,
)
)
or not new_name.endswith(".weight")

View file

@ -17472,6 +17472,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
// do not quantize RWKV's time_mix_first tensors
quantize &= name.find("time_mix_first.weight") == std::string::npos;
quantize &= name.find("time_mix_w1.weight") == std::string::npos;
quantize &= name.find("time_mix_w2.weight") == std::string::npos;
// do not quantize relative position bias (T5)
quantize &= name.find("attn_rel_b.weight") == std::string::npos;