Add custom kq scaling from Gemma2Attention

2024-06-29 10:17:33 -04:00 · 2024-06-29 10:17:33 -04:00 · a89427908d
commit a89427908d
parent 6f2464e3dd
4 changed files with 12 additions and 1 deletions
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@ -522,6 +522,9 @@ class GGUFWriter:
    def add_final_logit_softcapping(self, value: float) -> None:
        self.add_float32(Keys.LLM.FINAL_LOGIT_SOFTCAPPING.format(arch=self.arch), value)

+    def add_query_pre_attn_scalar(self, value: float) -> None:
+        self.add_float32(Keys.LLM.QUERY_PRE_ATTN_SCALAR.format(arch=self.arch), value)
+
    def add_expert_count(self, count: int) -> None:
        self.add_uint32(Keys.LLM.EXPERT_COUNT.format(arch=self.arch), count)