diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 70ce29f72..c9e6ebf30 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -2896,7 +2896,7 @@ class T5Model(Model): return [(self.map_tensor_name(name), data_torch)] -@Model.register("ChatGLMModel") +@Model.register("ChatGLMModel", "ChatGLMForConditionalGeneration") class ChatGLMModel(Model): model_arch = gguf.MODEL_ARCH.CHATGLM @@ -3043,7 +3043,6 @@ class ChatGLMModel(Model): self.gguf_writer.add_tokenizer_pre(tokpre) self.gguf_writer.add_token_list(tokens) self.gguf_writer.add_token_types(toktypes) - self.gguf_writer.add_add_bos_token(False) special_vocab = gguf.SpecialVocab(dir_model, load_merges=False) special_vocab.chat_template = "chatglm4" @@ -3070,6 +3069,8 @@ class ChatGLMModel(Model): self.gguf_writer.add_file_type(self.ftype) self.gguf_writer.add_rope_dimension_count(64) self.gguf_writer.add_add_bos_token(False) + self.gguf_writer.add_rope_freq_base(self.hparams.get("rope_ratio", 10000)) + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: del bid # unused diff --git a/src/llama.cpp b/src/llama.cpp index 9230d8982..ad17d5ab5 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -12281,9 +12281,9 @@ struct llm_build_context { cb(cur, "ffn_norm", il); cur = llm_build_ffn(ctx0, cur, - model.layers[il].ffn_up, NULL, - NULL, NULL, - model.layers[il].ffn_down, NULL, + model.layers[il].ffn_up, NULL, NULL, + NULL, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, NULL, LLM_FFN_SWIGLU, LLM_FFN_SEQ, cb, il); cb(cur, "ffn_out", il);