add preprocess to chatglm3 and chatglm4

This commit is contained in:
toyer 2024-06-21 07:47:51 +00:00
parent e773174052
commit 4b65b648ce
3 changed files with 33 additions and 14 deletions

View file

@ -2792,6 +2792,9 @@ class ChatGLMModel(Model):
toktypes.append(toktype)
self.gguf_writer.add_tokenizer_model("llama")
# glm3 needs prefix and suffix formatted as:
# prompt = "[gMASK]sop<|user|>\n" + prompt + "<|assistant|>"
self.gguf_writer.add_tokenizer_pre("chatglm-spm")
self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_scores(scores)
self.gguf_writer.add_token_types(toktypes)