Update tensor mappings for Phi models (Phi-1, Phi-1.5, Phi-2)
Signed-off-by: teleprint-me <77757836+teleprint-me@users.noreply.github.com>
This commit is contained in:
parent
e53d44c0bb
commit
e96f40bf99
1 changed files with 8 additions and 8 deletions
|
@ -17,7 +17,7 @@ class TensorNameMap:
|
||||||
"tok_embeddings", # llama-pth
|
"tok_embeddings", # llama-pth
|
||||||
"embeddings.word_embeddings", # bert
|
"embeddings.word_embeddings", # bert
|
||||||
"language_model.embedding.word_embeddings", # persimmon
|
"language_model.embedding.word_embeddings", # persimmon
|
||||||
"transformer.embd.wte", # phi2
|
"transformer.embd.wte", # phi1 phi1_5 phi2
|
||||||
),
|
),
|
||||||
|
|
||||||
# Token type embeddings
|
# Token type embeddings
|
||||||
|
@ -42,7 +42,7 @@ class TensorNameMap:
|
||||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen
|
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen
|
||||||
"output", # llama-pth bloom
|
"output", # llama-pth bloom
|
||||||
"word_embeddings_for_head", # persimmon
|
"word_embeddings_for_head", # persimmon
|
||||||
"lm_head.linear", # phi2
|
"lm_head.linear", # phi1 phi1_5 phi2
|
||||||
),
|
),
|
||||||
|
|
||||||
# Output norm
|
# Output norm
|
||||||
|
@ -55,7 +55,7 @@ class TensorNameMap:
|
||||||
"transformer.norm_f", # mpt
|
"transformer.norm_f", # mpt
|
||||||
"ln_f", # refact bloom qwen
|
"ln_f", # refact bloom qwen
|
||||||
"language_model.encoder.final_layernorm", # persimmon
|
"language_model.encoder.final_layernorm", # persimmon
|
||||||
"lm_head.ln", # phi2
|
"lm_head.ln", # phi1 phi1_5 phi2
|
||||||
),
|
),
|
||||||
|
|
||||||
# Rope frequencies
|
# Rope frequencies
|
||||||
|
@ -78,7 +78,7 @@ class TensorNameMap:
|
||||||
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
|
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
|
||||||
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
||||||
"model.layers.{bid}.ln1", # yi
|
"model.layers.{bid}.ln1", # yi
|
||||||
"transformer.h.{bid}.ln", # phi2
|
"transformer.h.{bid}.ln", # phi1 phi1_5 phi2
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention norm 2
|
# Attention norm 2
|
||||||
|
@ -94,7 +94,7 @@ class TensorNameMap:
|
||||||
"transformer.h.{bid}.self_attention.query_key_value", # falcon
|
"transformer.h.{bid}.self_attention.query_key_value", # falcon
|
||||||
"h.{bid}.self_attention.query_key_value", # bloom
|
"h.{bid}.self_attention.query_key_value", # bloom
|
||||||
"language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
|
"language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
|
||||||
"transformer.h.{bid}.mixer.Wqkv", # phi2
|
"transformer.h.{bid}.mixer.Wqkv", # phi1 phi1_5 phi2
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention query
|
# Attention query
|
||||||
|
@ -133,7 +133,7 @@ class TensorNameMap:
|
||||||
"encoder.layer.{bid}.attention.output.dense", # bert
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
||||||
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
||||||
"language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
|
"language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
|
||||||
"transformer.h.{bid}.mixer.out_proj", # phi2
|
"transformer.h.{bid}.mixer.out_proj", # phi1 phi1_5 phi2
|
||||||
),
|
),
|
||||||
|
|
||||||
# Rotary embeddings
|
# Rotary embeddings
|
||||||
|
@ -173,7 +173,7 @@ class TensorNameMap:
|
||||||
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
||||||
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
||||||
"transformer.h.{bid}.mlp.w1", # qwen
|
"transformer.h.{bid}.mlp.w1", # qwen
|
||||||
"transformer.h.{bid}.mlp.fc1", # phi2
|
"transformer.h.{bid}.mlp.fc1", # phi1 phi1_5 phi2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_UP_EXP: (
|
MODEL_TENSOR.FFN_UP_EXP: (
|
||||||
|
@ -205,7 +205,7 @@ class TensorNameMap:
|
||||||
"encoder.layer.{bid}.output.dense", # bert
|
"encoder.layer.{bid}.output.dense", # bert
|
||||||
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
||||||
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
||||||
"transformer.h.{bid}.mlp.fc2", # phi2
|
"transformer.h.{bid}.mlp.fc2", # phi1 phi1_5 phi2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP: (
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue