Remove custom pre attention scaling and use computed value instead.

This commit is contained in:
Andrei Betlen 2024-06-29 23:02:50 -04:00
parent a89427908d
commit 51f0bd50a1
4 changed files with 2 additions and 13 deletions

View file

@ -522,9 +522,6 @@ class GGUFWriter:
def add_final_logit_softcapping(self, value: float) -> None:
self.add_float32(Keys.LLM.FINAL_LOGIT_SOFTCAPPING.format(arch=self.arch), value)
def add_query_pre_attn_scalar(self, value: float) -> None:
self.add_float32(Keys.LLM.QUERY_PRE_ATTN_SCALAR.format(arch=self.arch), value)
def add_expert_count(self, count: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_COUNT.format(arch=self.arch), count)