From eea19039fc52ea2dbd1aab45b59ab4e3e29a3443 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 12 Jan 2024 13:00:51 +0200 Subject: [PATCH] convert : fix persimmon conversion to write correct n_rot --- convert-hf-to-gguf.py | 9 ++++++++- gguf-py/gguf/tensor_mapping.py | 7 +++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 203eaf64b..813aeeed6 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -817,10 +817,17 @@ class PersimmonModel(Model): hidden_size = self.hparams["hidden_size"] self.gguf_writer.add_name('persimmon-8b-chat') + self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(hidden_size) self.gguf_writer.add_block_count(block_count) self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) - self.gguf_writer.add_rope_dimension_count(hidden_size // head_count) + + # NOTE: not sure about this change - why does the model not have a rope dimension count when it is smaller + # than the head size? + # ref: https://github.com/ggerganov/llama.cpp/pull/4889 + #self.gguf_writer.add_rope_dimension_count(hidden_size // head_count) + self.gguf_writer.add_rope_dimension_count(hidden_size // head_count // 2) + self.gguf_writer.add_head_count(head_count) self.gguf_writer.add_head_count_kv(head_count_kv) self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"]) diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 80c1d5449..24a089037 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -57,6 +57,7 @@ class TensorNameMap: "transformer.norm_f", # mpt "ln_f", # refact bloom qwen gpt2 "language_model.encoder.final_layernorm", # persimmon + "model.final_layernorm", # persimmon "lm_head.ln", # phi2 ), @@ -98,6 +99,7 @@ class TensorNameMap: "transformer.h.{bid}.self_attention.query_key_value", # falcon "h.{bid}.self_attention.query_key_value", # bloom "language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon + "model.layers.{bid}.self_attn.query_key_value", # persimmon "h.{bid}.attn.c_attn", # gpt2 "transformer.h.{bid}.mixer.Wqkv", # phi2 ), @@ -141,6 +143,7 @@ class TensorNameMap: "encoder.layer.{bid}.attention.output.dense", # bert "transformer.h.{bid}.attn.out_proj", # gpt-j "language_model.encoder.layers.{bid}.self_attention.dense", # persimmon + "model.layers.{bid}.self_attn.dense", # persimmon "h.{bid}.attn.c_proj", # gpt2 "transformer.h.{bid}.mixer.out_proj", # phi2 "model.layers.layers.{bid}.self_attn.o_proj", # plamo @@ -184,6 +187,7 @@ class TensorNameMap: "encoder.layer.{bid}.intermediate.dense", # bert "transformer.h.{bid}.mlp.fc_in", # gpt-j "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon + "model.layers.{bid}.mlp.dense_h_to_4h", # persimmon "transformer.h.{bid}.mlp.w1", # qwen "h.{bid}.mlp.c_fc", # gpt2 "transformer.h.{bid}.mlp.fc1", # phi2 @@ -225,6 +229,7 @@ class TensorNameMap: "encoder.layer.{bid}.output.dense", # bert "transformer.h.{bid}.mlp.fc_out", # gpt-j "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon + "model.layers.{bid}.mlp.dense_4h_to_h", # persimmon "h.{bid}.mlp.c_proj", # gpt2 "transformer.h.{bid}.mlp.fc2", # phi2 "model.layers.layers.{bid}.mlp.down_proj", # plamo @@ -237,10 +242,12 @@ class TensorNameMap: MODEL_TENSOR.ATTN_Q_NORM: ( "language_model.encoder.layers.{bid}.self_attention.q_layernorm", + "model.layers.{bid}.self_attn.q_layernorm", # persimmon ), MODEL_TENSOR.ATTN_K_NORM: ( "language_model.encoder.layers.{bid}.self_attention.k_layernorm", + "model.layers.{bid}.self_attn.k_layernorm", # persimmon ), MODEL_TENSOR.ROPE_FREQS: (