This commit is contained in:
Ashish 2024-04-14 14:30:33 -07:00
parent 13c75c21eb
commit 412a2807cb

View file

@ -173,6 +173,7 @@ class TensorNameMap:
"transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok "transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
), ),
# Attention output norm # Attention output norm
MODEL_TENSOR.ATTN_OUT_NORM: ( MODEL_TENSOR.ATTN_OUT_NORM: (
"encoder.layer.{bid}.attention.output.LayerNorm", # bert "encoder.layer.{bid}.attention.output.LayerNorm", # bert
@ -260,6 +261,7 @@ class TensorNameMap:
"transformer.decoder_layer.{bid}.moe.linear", # Grok (merged) "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
), ),
# Feed-forward down # Feed-forward down
MODEL_TENSOR.FFN_DOWN: ( MODEL_TENSOR.FFN_DOWN: (
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox "gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox