diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 0c7f945ba..3ef2f69e7 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -2363,12 +2363,10 @@ class Gemma2Model(Model): self.gguf_writer.add_key_length(hparams["head_dim"]) self.gguf_writer.add_value_length(hparams["head_dim"]) self.gguf_writer.add_file_type(self.ftype) - self.gguf_writer.add_float32( - gguf.Keys.LLM.ATTN_LOGIT_SOFTCAPPING.format(arch=self.model_arch), + self.gguf_writer.add_attn_logit_softcapping( self.hparams["attn_logit_softcapping"] ) - self.gguf_writer.add_float32( - gguf.Keys.LLM.FINAL_LOGIT_SOFTCAPPING.format(arch=self.model_arch), + self.gguf_writer.add_final_logit_softcapping( self.hparams["final_logit_softcapping"] ) diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 9869f6fe3..1aeb0d9b0 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -516,6 +516,12 @@ class GGUFWriter: def add_logit_scale(self, value: float) -> None: self.add_float32(Keys.LLM.LOGIT_SCALE.format(arch=self.arch), value) + def add_attn_logit_softcapping(self, value: float) -> None: + self.add_float32(Keys.LLM.ATTN_LOGIT_SOFTCAPPING.format(arch=self.arch), value) + + def add_final_logit_softcapping(self, value: float) -> None: + self.add_float32(Keys.LLM.FINAL_LOGIT_SOFTCAPPING.format(arch=self.arch), value) + def add_expert_count(self, count: int) -> None: self.add_uint32(Keys.LLM.EXPERT_COUNT.format(arch=self.arch), count)