Remove custom pre attention scaling and use computed value instead.

2024-06-29 23:02:50 -04:00 · 2024-06-29 23:02:50 -04:00 · 51f0bd50a1
commit 51f0bd50a1
parent a89427908d
4 changed files with 2 additions and 13 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -2369,9 +2369,6 @@ class Gemma2Model(Model):
        self.gguf_writer.add_final_logit_softcapping(
            self.hparams["final_logit_softcapping"]
        )
-        self.gguf_writer.add_query_pre_attn_scalar(
-            self.hparams["query_pre_attn_scalar"]
-        )

    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
        del bid  # unusem