Add custom kq scaling from Gemma2Attention
This commit is contained in:
parent
6f2464e3dd
commit
a89427908d
4 changed files with 12 additions and 1 deletions
|
@ -52,6 +52,7 @@ class Keys:
|
|||
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
|
||||
ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
|
||||
FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
|
||||
QUERY_PRE_ATTN_SCALAR = "{arch}.query_pre_attn_scalar"
|
||||
|
||||
class Attention:
|
||||
HEAD_COUNT = "{arch}.attention.head_count"
|
||||
|
|
|
@ -522,6 +522,9 @@ class GGUFWriter:
|
|||
def add_final_logit_softcapping(self, value: float) -> None:
|
||||
self.add_float32(Keys.LLM.FINAL_LOGIT_SOFTCAPPING.format(arch=self.arch), value)
|
||||
|
||||
def add_query_pre_attn_scalar(self, value: float) -> None:
|
||||
self.add_float32(Keys.LLM.QUERY_PRE_ATTN_SCALAR.format(arch=self.arch), value)
|
||||
|
||||
def add_expert_count(self, count: int) -> None:
|
||||
self.add_uint32(Keys.LLM.EXPERT_COUNT.format(arch=self.arch), count)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue