mpt : standardized all tensor names to follow GGUF spec
This commit is contained in:
parent
1be89c4002
commit
26c253eda2
3 changed files with 19 additions and 20 deletions
|
@ -247,7 +247,7 @@ for part_name in part_names:
|
||||||
|
|
||||||
# note: MPT output is tied to (same as) wte in original model;
|
# note: MPT output is tied to (same as) wte in original model;
|
||||||
# for easier implementation in llama.cpp it's duplicated in GGUF, though :/
|
# for easier implementation in llama.cpp it's duplicated in GGUF, though :/
|
||||||
if new_name == "wte.weight":
|
if new_name == "token_embd.weight":
|
||||||
gguf_writer.add_tensor("output.weight", data)
|
gguf_writer.add_tensor("output.weight", data)
|
||||||
|
|
||||||
print("gguf: write header")
|
print("gguf: write header")
|
||||||
|
|
|
@ -186,17 +186,17 @@ MODEL_TENSOR_NAMES: dict[MODEL_ARCH, dict[MODEL_TENSOR, str]] = {
|
||||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
||||||
},
|
},
|
||||||
MODEL_ARCH.MPT: {
|
MODEL_ARCH.MPT: {
|
||||||
MODEL_TENSOR.TOKEN_EMBD: "wte",
|
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
||||||
MODEL_TENSOR.OUTPUT_NORM: "norm_f",
|
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
||||||
# note: MPT output is tied to (same as) wte in original model;
|
# note: MPT output is tied to (same as) wte in original model;
|
||||||
# for easier implementation in llama.cpp it's duplicated in GGUF, though :/
|
# for easier implementation in llama.cpp it's duplicated in GGUF, though :/
|
||||||
MODEL_TENSOR.OUTPUT: "output",
|
MODEL_TENSOR.OUTPUT: "output",
|
||||||
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.norm_1",
|
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
||||||
MODEL_TENSOR.FFN_NORM: "blk.{bid}.norm_2",
|
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
||||||
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn.Wqkv",
|
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
|
||||||
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn.out_proj",
|
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
||||||
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn.down_proj",
|
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
||||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn.up_proj",
|
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
||||||
},
|
},
|
||||||
MODEL_ARCH.GPT2: {
|
MODEL_ARCH.GPT2: {
|
||||||
# TODO
|
# TODO
|
||||||
|
|
21
llama.cpp
21
llama.cpp
|
@ -377,15 +377,15 @@ static std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NAMES =
|
||||||
{
|
{
|
||||||
LLM_ARCH_MPT,
|
LLM_ARCH_MPT,
|
||||||
{
|
{
|
||||||
{ LLM_TENSOR_TOKEN_EMBD, "wte" },
|
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||||
{ LLM_TENSOR_OUTPUT_NORM, "norm_f" },
|
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
||||||
{ LLM_TENSOR_OUTPUT, "output" },
|
{ LLM_TENSOR_OUTPUT, "output" },
|
||||||
{ LLM_TENSOR_ATTN_NORM, "blk.%d.norm_1" },
|
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
||||||
{ LLM_TENSOR_FFN_NORM, "blk.%d.norm_2" },
|
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
||||||
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn.Wqkv" },
|
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
|
||||||
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn.out_proj" },
|
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
||||||
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn.down_proj" },
|
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
||||||
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn.up_proj" },
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -6516,11 +6516,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
|
|
||||||
// TODO: avoid hardcoded tensor names - use the TN_* constants
|
// TODO: avoid hardcoded tensor names - use the TN_* constants
|
||||||
if (name.find("attn_v.weight") != std::string::npos ||
|
if (name.find("attn_v.weight") != std::string::npos ||
|
||||||
name.find("attn.Wqkv.weight") != std::string::npos) {
|
name.find("attn_qkv.weight") != std::string::npos) {
|
||||||
++n_attention_wv;
|
++n_attention_wv;
|
||||||
}
|
}
|
||||||
else if (name.find("ffn_down.weight") != std::string::npos ||
|
else if (name.find("ffn_down.weight") != std::string::npos) {
|
||||||
name.find("ffn.down_proj.weight") != std::string::npos) {
|
|
||||||
++n_feed_forward_w2;
|
++n_feed_forward_w2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue