Add custom kq scaling from Gemma2Attention

This commit is contained in:
Andrei Betlen 2024-06-29 10:17:33 -04:00
parent 6f2464e3dd
commit a89427908d
4 changed files with 12 additions and 1 deletions

View file

@ -52,6 +52,7 @@ class Keys:
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
QUERY_PRE_ATTN_SCALAR = "{arch}.query_pre_attn_scalar"
class Attention:
HEAD_COUNT = "{arch}.attention.head_count"