Update tensor_mapping.py

This commit is contained in:
DisOOM 2024-04-03 14:47:40 +08:00 committed by GitHub
parent b3cf383f24
commit 3b22eb7da5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -204,7 +204,8 @@ class TensorNameMap:
MODEL_TENSOR.FFN_GATE_INP: ( MODEL_TENSOR.FFN_GATE_INP: (
"layers.{bid}.feed_forward.gate", # mixtral "layers.{bid}.feed_forward.gate", # mixtral
"model.layers.{bid}.block_sparse_moe.gate", # mixtral "model.layers.{bid}.block_sparse_moe.gate", # mixtral
"transformer.decoder_layer.{bid}.router" # Grok "transformer.decoder_layer.{bid}.router", # Grok
"model.layers.{bid}.moe.gate", # qwen2moe-mergekit
), ),
# Feed-forward up # Feed-forward up
@ -234,6 +235,7 @@ class TensorNameMap:
"layers.{bid}.feed_forward.experts.{xid}.w3", # mixtral "layers.{bid}.feed_forward.experts.{xid}.w3", # mixtral
"model.layers.{bid}.block_sparse_moe.experts.{xid}.w3", # mixtral "model.layers.{bid}.block_sparse_moe.experts.{xid}.w3", # mixtral
"transformer.decoder_layer.{bid}.moe.{xid}.linear_v", # Grok "transformer.decoder_layer.{bid}.moe.{xid}.linear_v", # Grok
"model.layers.{bid}.moe.mlp.{xid}.up_proj", # qwen2moe-mergekit
), ),
# AWQ-activation gate # AWQ-activation gate
@ -254,7 +256,8 @@ class TensorNameMap:
MODEL_TENSOR.FFN_GATE_EXP: ( MODEL_TENSOR.FFN_GATE_EXP: (
"layers.{bid}.feed_forward.experts.{xid}.w1", # mixtral "layers.{bid}.feed_forward.experts.{xid}.w1", # mixtral
"model.layers.{bid}.block_sparse_moe.experts.{xid}.w1", # mixtral "model.layers.{bid}.block_sparse_moe.experts.{xid}.w1", # mixtral
"transformer.decoder_layer.{bid}.moe.{xid}.linear" # Grok "transformer.decoder_layer.{bid}.moe.{xid}.linear", # Grok
"model.layers.{bid}.moe.mlp.{xid}.gate_proj", # qwen2moe-mergekit
), ),
# Feed-forward down # Feed-forward down
@ -283,7 +286,7 @@ class TensorNameMap:
"layers.{bid}.feed_forward.experts.{xid}.w2", # mixtral "layers.{bid}.feed_forward.experts.{xid}.w2", # mixtral
"model.layers.{bid}.block_sparse_moe.experts.{xid}.w2", # mixtral "model.layers.{bid}.block_sparse_moe.experts.{xid}.w2", # mixtral
"transformer.decoder_layer.{bid}.moe.{xid}.linear_1", # Grok "transformer.decoder_layer.{bid}.moe.{xid}.linear_1", # Grok
"model.layers.{bid}.moe.mlp.{xid}.down_proj", # qwen2moe-mergekit
), ),
MODEL_TENSOR.ATTN_Q_NORM: ( MODEL_TENSOR.ATTN_Q_NORM: (