Add custom kq scaling from Gemma2Attention

2024-06-29 10:17:33 -04:00 · 2024-06-29 10:17:33 -04:00 · a89427908d
commit a89427908d
parent 6f2464e3dd
4 changed files with 12 additions and 1 deletions
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@ -52,6 +52,7 @@ class Keys:
        DECODER_START_TOKEN_ID            = "{arch}.decoder_start_token_id"
        ATTN_LOGIT_SOFTCAPPING            = "{arch}.attn_logit_softcapping"
        FINAL_LOGIT_SOFTCAPPING           = "{arch}.final_logit_softcapping"
+        QUERY_PRE_ATTN_SCALAR             = "{arch}.query_pre_attn_scalar"

    class Attention:
        HEAD_COUNT        = "{arch}.attention.head_count"