convert: dbrx: fix remove wrong ATTN_OUT_NORM tensor, add output layer mapping
This commit is contained in:
parent
c8e6f903e0
commit
916b91852b
2 changed files with 9 additions and 8 deletions
|
@ -648,7 +648,6 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|||
MODEL_TENSOR.ATTN_QKV,
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
MODEL_TENSOR.ATTN_NORM_2,
|
||||
MODEL_TENSOR.ATTN_OUT_NORM,
|
||||
MODEL_TENSOR.FFN_GATE_INP,
|
||||
MODEL_TENSOR.FFN_GATE_EXP,
|
||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
||||
|
|
|
@ -52,6 +52,7 @@ class TensorNameMap:
|
|||
"output", # llama-pth bloom internlm2
|
||||
"word_embeddings_for_head", # persimmon
|
||||
"lm_head.linear", # phi2
|
||||
"transformer.wte.weight", # dbrx
|
||||
),
|
||||
|
||||
# Output norm
|
||||
|
@ -68,6 +69,7 @@ class TensorNameMap:
|
|||
"model.norm_f", # mamba-qbert
|
||||
"backbone.norm_f", # mamba
|
||||
"transformer.rms_norm", # Grok
|
||||
"transformer.norm_f.weight", # dbrx
|
||||
),
|
||||
|
||||
# Rope frequencies
|
||||
|
@ -176,10 +178,9 @@ class TensorNameMap:
|
|||
|
||||
# Attention output norm
|
||||
MODEL_TENSOR.ATTN_OUT_NORM: (
|
||||
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
|
||||
"encoder.layers.{bid}.norm1", # nomic-bert
|
||||
"transformer.decoder_layer.{bid}.rms_norm_1", # Grok
|
||||
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj.weight", # dbrx
|
||||
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
|
||||
"encoder.layers.{bid}.norm1", # nomic-bert
|
||||
"transformer.decoder_layer.{bid}.rms_norm_1", # Grok
|
||||
),
|
||||
|
||||
# Rotary embeddings
|
||||
|
@ -307,9 +308,10 @@ class TensorNameMap:
|
|||
),
|
||||
|
||||
MODEL_TENSOR.LAYER_OUT_NORM: (
|
||||
"encoder.layer.{bid}.output.LayerNorm", # bert
|
||||
"encoder.layers.{bid}.norm2", # nomic-bert
|
||||
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
||||
"encoder.layer.{bid}.output.LayerNorm", # bert
|
||||
"encoder.layers.{bid}.norm2", # nomic-bert
|
||||
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
||||
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj.weight", # dbrx
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_IN: (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue