convert: dbrx: fix mixed up and down expert tensors

This commit is contained in:
Pierrick HYMBERT 2024-04-06 19:21:43 +02:00
parent a7f9a3eafc
commit e3c1e8127c

View file

@ -238,7 +238,7 @@ class TensorNameMap:
MODEL_TENSOR.FFN_UP_EXP: (
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
),
# AWQ-activation gate
@ -287,7 +287,7 @@ class TensorNameMap:
MODEL_TENSOR.FFN_DOWN_EXP: (
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
),
MODEL_TENSOR.ATTN_Q_NORM: (