convert : fix RWKV v6 model conversion (#10913)

* Enable --no-context-shift for llama-perplexity example Signed-off-by: Molly Sophia <mollysophia379@gmail.com> * RWKV 6: Fix error in ggml_cuda_op_bin_bcast Signed-off-by: Molly Sophia <mollysophia379@gmail.com> --------- Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
2024-12-20 17:44:58 +08:00 · 2024-12-20 17:44:58 +08:00 · 0a11f8b7b5
commit 0a11f8b7b5
parent d408bb9268
2 changed files with 4 additions and 1 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -3065,6 +3065,9 @@ class Rwkv6Model(Model):
        if new_name.endswith("time_mix_w2.weight"):
            data_torch = data_torch.permute(0, 2, 1)

+        if new_name.endswith("time_mix_decay.weight") or "lerp" in new_name:
+            data_torch = data_torch.squeeze()
+
        rescale_every_n_layers = self.hparams["rescale_every"]
        if rescale_every_n_layers > 0:
            if new_name.endswith("time_mix_output.weight") or new_name.endswith("channel_mix_value.weight"):