set <|endoftext|> as eos and <|user|> as eot
This commit is contained in:
parent
89e8aaf960
commit
9396c7bbaf
2 changed files with 1 additions and 14 deletions
|
@ -3048,9 +3048,9 @@ class ChatGLMModel(Model):
|
|||
special_vocab.chat_template = "ChatGLM4"
|
||||
special_vocab.merges = merges
|
||||
# only add special tokens when they were not already loaded from config.json
|
||||
# if len(special_vocab.special_token_ids) == 0:
|
||||
special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["<|endoftext|>"])
|
||||
special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])
|
||||
special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"])
|
||||
# this one is usually not in config.json anyway
|
||||
special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<|endoftext|>"])
|
||||
special_vocab.add_to_gguf(self.gguf_writer)
|
||||
|
|
13
llama.cpp
13
llama.cpp
|
@ -18832,19 +18832,6 @@ llama_token_attr llama_token_get_attr(const struct llama_model * model, llama_to
|
|||
}
|
||||
|
||||
bool llama_token_is_eog(const struct llama_model * model, llama_token token) {
|
||||
auto arch_name = llama_model_arch_name(model->arch);
|
||||
auto vocab_type = model->vocab.type;
|
||||
if (strcmp(arch_name, "chatglm") == 0) {
|
||||
if (LLAMA_VOCAB_TYPE_BPE == vocab_type) { // glm4
|
||||
return token != -1 && (
|
||||
token == llama_token_eos(model) ||
|
||||
token == llama_token_eot(model) ||
|
||||
token == 151329 ||
|
||||
token == 151336 ||
|
||||
token == 151338
|
||||
);
|
||||
}
|
||||
}
|
||||
return token != -1 && (
|
||||
token == llama_token_eos(model) ||
|
||||
token == llama_token_eot(model)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue