Merge branch 'master' into pr/7359
This commit is contained in:
commit
22a648f8cc
88 changed files with 1892 additions and 1441 deletions
|
@ -10,7 +10,7 @@ class TensorNameMap:
|
|||
# Token embeddings
|
||||
MODEL_TENSOR.TOKEN_EMBD: (
|
||||
"gpt_neox.embed_in", # gptneox
|
||||
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx
|
||||
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
|
||||
"transformer.word_embeddings", # falcon
|
||||
"word_embeddings", # bloom
|
||||
"model.embed_tokens", # llama-hf
|
||||
|
@ -51,7 +51,7 @@ class TensorNameMap:
|
|||
# Output
|
||||
MODEL_TENSOR.OUTPUT: (
|
||||
"embed_out", # gptneox
|
||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx
|
||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
|
||||
"output", # llama-pth bloom internlm2
|
||||
"word_embeddings_for_head", # persimmon
|
||||
"lm_head.linear", # phi2
|
||||
|
@ -60,7 +60,7 @@ class TensorNameMap:
|
|||
# Output norm
|
||||
MODEL_TENSOR.OUTPUT_NORM: (
|
||||
"gpt_neox.final_layer_norm", # gptneox
|
||||
"transformer.ln_f", # gpt2 gpt-j falcon
|
||||
"transformer.ln_f", # gpt2 gpt-j falcon jais
|
||||
"model.norm", # llama-hf baichuan internlm2
|
||||
"norm", # llama-pth
|
||||
"transformer.norm_f", # mpt dbrx
|
||||
|
@ -84,7 +84,7 @@ class TensorNameMap:
|
|||
# Attention norm
|
||||
MODEL_TENSOR.ATTN_NORM: (
|
||||
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
|
||||
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen
|
||||
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
|
||||
"transformer.blocks.{bid}.norm_1", # mpt
|
||||
"transformer.h.{bid}.input_layernorm", # falcon7b
|
||||
"h.{bid}.input_layernorm", # bloom
|
||||
|
@ -113,7 +113,7 @@ class TensorNameMap:
|
|||
# Attention query-key-value
|
||||
MODEL_TENSOR.ATTN_QKV: (
|
||||
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
|
||||
"transformer.h.{bid}.attn.c_attn", # gpt2 qwen
|
||||
"transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais
|
||||
"transformer.blocks.{bid}.attn.Wqkv", # mpt
|
||||
"transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
|
||||
"transformer.h.{bid}.self_attention.query_key_value", # falcon
|
||||
|
@ -165,7 +165,7 @@ class TensorNameMap:
|
|||
# Attention output
|
||||
MODEL_TENSOR.ATTN_OUT: (
|
||||
"gpt_neox.layers.{bid}.attention.dense", # gptneox
|
||||
"transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen
|
||||
"transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais
|
||||
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
||||
"transformer.h.{bid}.self_attention.dense", # falcon
|
||||
"h.{bid}.self_attention.dense", # bloom
|
||||
|
@ -208,7 +208,7 @@ class TensorNameMap:
|
|||
# Feed-forward norm
|
||||
MODEL_TENSOR.FFN_NORM: (
|
||||
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
||||
"transformer.h.{bid}.ln_2", # gpt2 refact qwen
|
||||
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
|
||||
"h.{bid}.post_attention_layernorm", # bloom
|
||||
"transformer.blocks.{bid}.norm_2", # mpt
|
||||
"model.layers.{bid}.post_attention_layernorm", # llama-hf
|
||||
|
@ -246,7 +246,7 @@ class TensorNameMap:
|
|||
# Feed-forward up
|
||||
MODEL_TENSOR.FFN_UP: (
|
||||
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
||||
"transformer.h.{bid}.mlp.c_fc", # gpt2
|
||||
"transformer.h.{bid}.mlp.c_fc", # gpt2 jais
|
||||
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
||||
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
||||
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
||||
|
@ -292,6 +292,7 @@ class TensorNameMap:
|
|||
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
||||
"layers.{bid}.feed_forward.w1", # llama-pth
|
||||
"transformer.h.{bid}.mlp.w2", # qwen
|
||||
"transformer.h.{bid}.mlp.c_fc2", # jais
|
||||
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
|
||||
"model.layers.{bid}.feed_forward.w1", # internlm2
|
||||
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
|
||||
|
@ -315,7 +316,7 @@ class TensorNameMap:
|
|||
# Feed-forward down
|
||||
MODEL_TENSOR.FFN_DOWN: (
|
||||
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
|
||||
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen
|
||||
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais
|
||||
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
||||
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
||||
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue