Fix tokenizer_clean_spaces for megrez
This commit is contained in:
parent
a02c63d710
commit
01a0c36e04
1 changed files with 4 additions and 2 deletions
|
@ -6604,8 +6604,7 @@ static void llm_load_vocab(
|
|||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_COMMAND_R;
|
||||
vocab.tokenizer_clean_spaces = false;
|
||||
} else if (
|
||||
tokenizer_pre == "qwen2" ||
|
||||
tokenizer_pre == "megrez") {
|
||||
tokenizer_pre == "qwen2") {
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_QWEN2;
|
||||
vocab.tokenizer_clean_spaces = false;
|
||||
} else if (
|
||||
|
@ -6665,6 +6664,9 @@ static void llm_load_vocab(
|
|||
} else if (
|
||||
tokenizer_pre == "minerva-7b") {
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MINERVA;
|
||||
} else if (
|
||||
tokenizer_pre == "megrez") {
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_QWEN2;
|
||||
} else {
|
||||
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue