From 26c253eda29bac3d76f36fd37a6861e32961013a Mon Sep 17 00:00:00 2001 From: Jan Ploski Date: Sun, 1 Oct 2023 01:43:39 +0200 Subject: [PATCH] mpt : standardized all tensor names to follow GGUF spec --- convert-mpt-hf-to-gguf.py | 2 +- gguf-py/gguf/gguf.py | 16 ++++++++-------- llama.cpp | 21 ++++++++++----------- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/convert-mpt-hf-to-gguf.py b/convert-mpt-hf-to-gguf.py index 057cb34f6..a0cee0cf9 100755 --- a/convert-mpt-hf-to-gguf.py +++ b/convert-mpt-hf-to-gguf.py @@ -247,7 +247,7 @@ for part_name in part_names: # note: MPT output is tied to (same as) wte in original model; # for easier implementation in llama.cpp it's duplicated in GGUF, though :/ - if new_name == "wte.weight": + if new_name == "token_embd.weight": gguf_writer.add_tensor("output.weight", data) print("gguf: write header") diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 9c49d0ada..afd16e212 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -186,17 +186,17 @@ MODEL_TENSOR_NAMES: dict[MODEL_ARCH, dict[MODEL_TENSOR, str]] = { MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", }, MODEL_ARCH.MPT: { - MODEL_TENSOR.TOKEN_EMBD: "wte", - MODEL_TENSOR.OUTPUT_NORM: "norm_f", + MODEL_TENSOR.TOKEN_EMBD: "token_embd", + MODEL_TENSOR.OUTPUT_NORM: "output_norm", # note: MPT output is tied to (same as) wte in original model; # for easier implementation in llama.cpp it's duplicated in GGUF, though :/ MODEL_TENSOR.OUTPUT: "output", - MODEL_TENSOR.ATTN_NORM: "blk.{bid}.norm_1", - MODEL_TENSOR.FFN_NORM: "blk.{bid}.norm_2", - MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn.Wqkv", - MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn.out_proj", - MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn.down_proj", - MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn.up_proj", + MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm", + MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm", + MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv", + MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output", + MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down", + MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", }, MODEL_ARCH.GPT2: { # TODO diff --git a/llama.cpp b/llama.cpp index 2bec27b8b..7ea6dbe72 100644 --- a/llama.cpp +++ b/llama.cpp @@ -377,15 +377,15 @@ static std::map> LLM_TENSOR_NAMES = { LLM_ARCH_MPT, { - { LLM_TENSOR_TOKEN_EMBD, "wte" }, - { LLM_TENSOR_OUTPUT_NORM, "norm_f" }, + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, { LLM_TENSOR_OUTPUT, "output" }, - { LLM_TENSOR_ATTN_NORM, "blk.%d.norm_1" }, - { LLM_TENSOR_FFN_NORM, "blk.%d.norm_2" }, - { LLM_TENSOR_ATTN_QKV, "blk.%d.attn.Wqkv" }, - { LLM_TENSOR_ATTN_OUT, "blk.%d.attn.out_proj" }, - { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn.down_proj" }, - { LLM_TENSOR_FFN_UP, "blk.%d.ffn.up_proj" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, }, }, { @@ -6516,11 +6516,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // TODO: avoid hardcoded tensor names - use the TN_* constants if (name.find("attn_v.weight") != std::string::npos || - name.find("attn.Wqkv.weight") != std::string::npos) { + name.find("attn_qkv.weight") != std::string::npos) { ++n_attention_wv; } - else if (name.find("ffn_down.weight") != std::string::npos || - name.find("ffn.down_proj.weight") != std::string::npos) { + else if (name.find("ffn_down.weight") != std::string::npos) { ++n_feed_forward_w2; } }