fix conflicts and add rope_ratio & ChatGLMForConditionalGeneration
This commit is contained in:
parent
1dc8e91081
commit
e9e47eb971
2 changed files with 6 additions and 5 deletions
|
@ -2896,7 +2896,7 @@ class T5Model(Model):
|
||||||
return [(self.map_tensor_name(name), data_torch)]
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
|
|
||||||
@Model.register("ChatGLMModel")
|
@Model.register("ChatGLMModel", "ChatGLMForConditionalGeneration")
|
||||||
class ChatGLMModel(Model):
|
class ChatGLMModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.CHATGLM
|
model_arch = gguf.MODEL_ARCH.CHATGLM
|
||||||
|
|
||||||
|
@ -3043,7 +3043,6 @@ class ChatGLMModel(Model):
|
||||||
self.gguf_writer.add_tokenizer_pre(tokpre)
|
self.gguf_writer.add_tokenizer_pre(tokpre)
|
||||||
self.gguf_writer.add_token_list(tokens)
|
self.gguf_writer.add_token_list(tokens)
|
||||||
self.gguf_writer.add_token_types(toktypes)
|
self.gguf_writer.add_token_types(toktypes)
|
||||||
self.gguf_writer.add_add_bos_token(False)
|
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(dir_model, load_merges=False)
|
special_vocab = gguf.SpecialVocab(dir_model, load_merges=False)
|
||||||
special_vocab.chat_template = "chatglm4"
|
special_vocab.chat_template = "chatglm4"
|
||||||
|
@ -3070,6 +3069,8 @@ class ChatGLMModel(Model):
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
self.gguf_writer.add_rope_dimension_count(64)
|
self.gguf_writer.add_rope_dimension_count(64)
|
||||||
self.gguf_writer.add_add_bos_token(False)
|
self.gguf_writer.add_add_bos_token(False)
|
||||||
|
self.gguf_writer.add_rope_freq_base(self.hparams.get("rope_ratio", 10000))
|
||||||
|
|
||||||
|
|
||||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
del bid # unused
|
del bid # unused
|
||||||
|
|
|
@ -12281,9 +12281,9 @@ struct llm_build_context {
|
||||||
cb(cur, "ffn_norm", il);
|
cb(cur, "ffn_norm", il);
|
||||||
|
|
||||||
cur = llm_build_ffn(ctx0, cur,
|
cur = llm_build_ffn(ctx0, cur,
|
||||||
model.layers[il].ffn_up, NULL,
|
model.layers[il].ffn_up, NULL, NULL,
|
||||||
NULL, NULL,
|
NULL, NULL, NULL,
|
||||||
model.layers[il].ffn_down, NULL,
|
model.layers[il].ffn_down, NULL, NULL,
|
||||||
NULL,
|
NULL,
|
||||||
LLM_FFN_SWIGLU, LLM_FFN_SEQ, cb, il);
|
LLM_FFN_SWIGLU, LLM_FFN_SEQ, cb, il);
|
||||||
cb(cur, "ffn_out", il);
|
cb(cur, "ffn_out", il);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue