fix conflicts and add rope_ratio & ChatGLMForConditionalGeneration

2024-06-27 06:27:35 +00:00 · 2024-06-27 06:27:35 +00:00 · e9e47eb971
commit e9e47eb971
parent 1dc8e91081
2 changed files with 6 additions and 5 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -2896,7 +2896,7 @@ class T5Model(Model):
        return [(self.map_tensor_name(name), data_torch)]
-@Model.register("ChatGLMModel")
+@Model.register("ChatGLMModel", "ChatGLMForConditionalGeneration")
 class ChatGLMModel(Model):
    model_arch = gguf.MODEL_ARCH.CHATGLM
@ -3043,7 +3043,6 @@ class ChatGLMModel(Model):
        self.gguf_writer.add_tokenizer_pre(tokpre)
        self.gguf_writer.add_token_list(tokens)
        self.gguf_writer.add_token_types(toktypes)
        self.gguf_writer.add_add_bos_token(False)
        special_vocab = gguf.SpecialVocab(dir_model, load_merges=False)
        special_vocab.chat_template = "chatglm4"
@ -3070,6 +3069,8 @@ class ChatGLMModel(Model):
        self.gguf_writer.add_file_type(self.ftype)
        self.gguf_writer.add_rope_dimension_count(64)
        self.gguf_writer.add_add_bos_token(False)
        self.gguf_writer.add_rope_freq_base(self.hparams.get("rope_ratio", 10000))
    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
        del bid  # unused
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -12281,9 +12281,9 @@ struct llm_build_context {
                cb(cur, "ffn_norm", il);
                cur = llm_build_ffn(ctx0, cur,
-                        model.layers[il].ffn_up,   NULL,
+                        model.layers[il].ffn_up,   NULL, NULL,
-                        NULL,                      NULL,
+                        NULL,                      NULL, NULL,
-                        model.layers[il].ffn_down, NULL,
+                        model.layers[il].ffn_down, NULL, NULL,
                        NULL,
                        LLM_FFN_SWIGLU, LLM_FFN_SEQ, cb, il);
                cb(cur, "ffn_out", il);