llama : initial Mamba-2 support

This commit is contained in:
Francis Couture-Harpin 2024-08-01 10:43:42 -04:00
parent a1631e53f6
commit 1f0fea70fb
7 changed files with 490 additions and 82 deletions

View file

@ -396,7 +396,7 @@ class TensorNameMap:
"encoder.layers.{bid}.norm2", # nomic-bert
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
),
MODEL_TENSOR.SSM_IN: (
@ -429,6 +429,10 @@ class TensorNameMap:
"backbone.layers.{bid}.mixer.D",
),
MODEL_TENSOR.SSM_NORM: (
"backbone.layers.{bid}.mixer.norm", # mamba2
),
MODEL_TENSOR.SSM_OUT: (
"model.layers.{bid}.out_proj",
"backbone.layers.{bid}.mixer.out_proj",