llama: Add support for Gemma2ForCausalLM (#8156)

* Inference support for Gemma 2 model family * Update convert-hf-to-gguf.py, constants, and tensor mappings * cleanup * format fix * Fix special token vocab bug * Don't add space prefix * fix deleted lines * Update src/llama.cpp Co-authored-by: slaren <slarengh@gmail.com> * Add model type names * Add control vector * Fix model type identification --------- Co-authored-by: Andrei Betlen <abetlen@gmail.com> Co-authored-by: slaren <slarengh@gmail.com>
2024-06-28 00:00:43 -04:00 · 2024-06-28 00:00:43 -04:00 · e57dc62057
commit e57dc62057
parent a27aa50ab7
4 changed files with 274 additions and 1 deletions
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@ -187,6 +187,10 @@ class TensorNameMap:
            "transformer.blocks.{bid}.norm_attn_norm.norm_2",  # dbrx
        ),

+        MODEL_TENSOR.ATTN_POST_NORM: (
+            "model.layers.{bid}.post_attention_layernorm",     # gemma2
+        ),
+
        # Rotary embeddings
        MODEL_TENSOR.ATTN_ROT_EMBD: (
            "model.layers.{bid}.self_attn.rotary_emb.inv_freq",        # llama-hf
@ -210,6 +214,16 @@ class TensorNameMap:
            "transformer.decoder_layer.{bid}.rms_norm_2",                    # Grok
        ),

+        # Post feed-forward norm
+        MODEL_TENSOR.FFN_PRE_NORM: (
+            "model.layers.{bid}.pre_feedforward_layernorm", # gemma2
+        ),
+
+        # Post feed-forward norm
+        MODEL_TENSOR.FFN_POST_NORM: (
+            "model.layers.{bid}.post_feedforward_layernorm", # gemma2
+        ),
+
        MODEL_TENSOR.FFN_GATE_INP: (
            "layers.{bid}.feed_forward.gate",             # mixtral
            "model.layers.{bid}.block_sparse_moe.gate",   # mixtral