llama : initial Mamba-2 support
This commit is contained in:
parent
a1631e53f6
commit
1f0fea70fb
7 changed files with 490 additions and 82 deletions
|
@ -396,7 +396,7 @@ class TensorNameMap:
|
|||
"encoder.layers.{bid}.norm2", # nomic-bert
|
||||
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
||||
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
||||
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
|
||||
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_IN: (
|
||||
|
@ -429,6 +429,10 @@ class TensorNameMap:
|
|||
"backbone.layers.{bid}.mixer.D",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_NORM: (
|
||||
"backbone.layers.{bid}.mixer.norm", # mamba2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_OUT: (
|
||||
"model.layers.{bid}.out_proj",
|
||||
"backbone.layers.{bid}.mixer.out_proj",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue