llama : fix order of pre-tokenizers

2024-07-19 13:21:38 +03:00 · 2024-07-19 13:21:38 +03:00 · 447c08092d
commit 447c08092d
parent 7fc85054bf
1 changed files with 3 additions and 3 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -5517,14 +5517,14 @@ static void llm_load_vocab(
                tokenizer_pre == "viking") {
                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_VIKING;
                vocab.tokenizer_clean_spaces = false;
            } else if (
                tokenizer_pre == "jais") {
                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_JAIS;
            } else if (
                tokenizer_pre == "tekken") {
                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_TEKKEN;
                vocab.tokenizer_ignore_merges = true;
                vocab.tokenizer_add_bos = true;
            } else if (
                tokenizer_pre == "jais") {
                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_JAIS;
            } else {
                throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
            }