fix conflicts and add rope_ratio & ChatGLMForConditionalGeneration

This commit is contained in:
toyer 2024-06-27 06:27:35 +00:00
parent 1dc8e91081
commit e9e47eb971
2 changed files with 6 additions and 5 deletions

View file

@ -2896,7 +2896,7 @@ class T5Model(Model):
return [(self.map_tensor_name(name), data_torch)] return [(self.map_tensor_name(name), data_torch)]
@Model.register("ChatGLMModel") @Model.register("ChatGLMModel", "ChatGLMForConditionalGeneration")
class ChatGLMModel(Model): class ChatGLMModel(Model):
model_arch = gguf.MODEL_ARCH.CHATGLM model_arch = gguf.MODEL_ARCH.CHATGLM
@ -3043,7 +3043,6 @@ class ChatGLMModel(Model):
self.gguf_writer.add_tokenizer_pre(tokpre) self.gguf_writer.add_tokenizer_pre(tokpre)
self.gguf_writer.add_token_list(tokens) self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_types(toktypes) self.gguf_writer.add_token_types(toktypes)
self.gguf_writer.add_add_bos_token(False)
special_vocab = gguf.SpecialVocab(dir_model, load_merges=False) special_vocab = gguf.SpecialVocab(dir_model, load_merges=False)
special_vocab.chat_template = "chatglm4" special_vocab.chat_template = "chatglm4"
@ -3070,6 +3069,8 @@ class ChatGLMModel(Model):
self.gguf_writer.add_file_type(self.ftype) self.gguf_writer.add_file_type(self.ftype)
self.gguf_writer.add_rope_dimension_count(64) self.gguf_writer.add_rope_dimension_count(64)
self.gguf_writer.add_add_bos_token(False) self.gguf_writer.add_add_bos_token(False)
self.gguf_writer.add_rope_freq_base(self.hparams.get("rope_ratio", 10000))
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused del bid # unused

View file

@ -12281,9 +12281,9 @@ struct llm_build_context {
cb(cur, "ffn_norm", il); cb(cur, "ffn_norm", il);
cur = llm_build_ffn(ctx0, cur, cur = llm_build_ffn(ctx0, cur,
model.layers[il].ffn_up, NULL, model.layers[il].ffn_up, NULL, NULL,
NULL, NULL, NULL, NULL, NULL,
model.layers[il].ffn_down, NULL, model.layers[il].ffn_down, NULL, NULL,
NULL, NULL,
LLM_FFN_SWIGLU, LLM_FFN_SEQ, cb, il); LLM_FFN_SWIGLU, LLM_FFN_SEQ, cb, il);
cb(cur, "ffn_out", il); cb(cur, "ffn_out", il);