diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7374cb25e..27ac34b81 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -300,6 +300,8 @@ class Model: gguf.MODEL_TENSOR.TOKEN_TYPES, gguf.MODEL_TENSOR.SSM_CONV1D, gguf.MODEL_TENSOR.TIME_MIX_FIRST, + gguf.MODEL_TENSOR.TIME_MIX_W1, + gguf.MODEL_TENSOR.TIME_MIX_W2, ) ) or not new_name.endswith(".weight") diff --git a/src/llama.cpp b/src/llama.cpp index 62e8c0c34..e437d265e 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17472,6 +17472,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // do not quantize RWKV's time_mix_first tensors quantize &= name.find("time_mix_first.weight") == std::string::npos; + quantize &= name.find("time_mix_w1.weight") == std::string::npos; + quantize &= name.find("time_mix_w2.weight") == std::string::npos; // do not quantize relative position bias (T5) quantize &= name.find("attn_rel_b.weight") == std::string::npos;