convert: dbrx: fix mixed up and down expert tensors
This commit is contained in:
parent
a7f9a3eafc
commit
e3c1e8127c
1 changed files with 2 additions and 2 deletions
|
@ -238,7 +238,7 @@ class TensorNameMap:
|
|||
MODEL_TENSOR.FFN_UP_EXP: (
|
||||
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
|
||||
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
|
||||
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
||||
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
||||
),
|
||||
|
||||
# AWQ-activation gate
|
||||
|
@ -287,7 +287,7 @@ class TensorNameMap:
|
|||
MODEL_TENSOR.FFN_DOWN_EXP: (
|
||||
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
|
||||
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
|
||||
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
||||
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue