From 824fa750d4b441c4266a0581efbc32194f1bfe42 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 17 Dec 2024 10:25:17 +0200 Subject: [PATCH] llama : update WavTokenizer to non-causal attn --- convert_hf_to_gguf.py | 2 ++ src/llama.cpp | 1 + 2 files changed, 3 insertions(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7bf67a268..4a0b00f69 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2069,6 +2069,8 @@ class WavTokenizerDecModel(Model): self.gguf_writer.add_convnext_embedding_length(self.hparams["convnext"]["n_embd"]) self.gguf_writer.add_convnext_block_count (self.hparams["convnext"]["n_layer"]) + self.gguf_writer.add_causal_attention(False) + @Model.register("Qwen2MoeForCausalLM") class Qwen2MoeModel(Model): diff --git a/src/llama.cpp b/src/llama.cpp index 8f16d2d4f..94160d534 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -6393,6 +6393,7 @@ static void llm_load_hparams( ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps); ml.get_key(LLM_KV_ATTENTION_GROUPNORM_EPS, hparams.f_norm_group_eps); ml.get_key(LLM_KV_ATTENTION_GROUPNORM_GROUPS, hparams.n_norm_groups); + ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn); } break; default: (void)0; }