llama: Add support for Gemma2ForCausalLM (#8156)

* Inference support for Gemma 2 model family

* Update convert-hf-to-gguf.py, constants, and tensor mappings

* cleanup

* format fix

* Fix special token vocab bug

* Don't add space prefix

* fix deleted lines

* Update src/llama.cpp

Co-authored-by: slaren <slarengh@gmail.com>

* Add model type names

* Add control vector

* Fix model type identification

---------

Co-authored-by: Andrei Betlen <abetlen@gmail.com>
Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
pculliton 2024-06-28 00:00:43 -04:00 committed by GitHub
parent a27aa50ab7
commit e57dc62057
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 274 additions and 1 deletions

View file

@ -187,6 +187,10 @@ class TensorNameMap:
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
),
MODEL_TENSOR.ATTN_POST_NORM: (
"model.layers.{bid}.post_attention_layernorm", # gemma2
),
# Rotary embeddings
MODEL_TENSOR.ATTN_ROT_EMBD: (
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
@ -210,6 +214,16 @@ class TensorNameMap:
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
),
# Post feed-forward norm
MODEL_TENSOR.FFN_PRE_NORM: (
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
),
# Post feed-forward norm
MODEL_TENSOR.FFN_POST_NORM: (
"model.layers.{bid}.post_feedforward_layernorm", # gemma2
),
MODEL_TENSOR.FFN_GATE_INP: (
"layers.{bid}.feed_forward.gate", # mixtral
"model.layers.{bid}.block_sparse_moe.gate", # mixtral