diff --git a/llama.cpp b/llama.cpp index 0dca86f67..faa5dc582 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4228,16 +4228,6 @@ static void llm_load_vocab( if (add_space_prefix_keyidx != -1) { vocab.add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx); } // The default value of add_space_prefix is true. - } else if (tokenizer_name == "bert") { - vocab.type = LLAMA_VOCAB_TYPE_WPM; - - // default special tokens - vocab.special_bos_id = 101; - vocab.special_eos_id = 102; - vocab.special_unk_id = 100; - vocab.special_sep_id = -1; - vocab.special_pad_id = -1; - vocab.add_space_prefix = false; } else { if (tokenizer_name == "gpt2") { vocab.type = LLAMA_VOCAB_TYPE_BPE; @@ -4245,6 +4235,16 @@ static void llm_load_vocab( vocab.type = LLAMA_VOCAB_TYPE_DEEPSEEKCODER; } else if (tokenizer_name == "deepseek_llm") { vocab.type = LLAMA_VOCAB_TYPE_DEEPSEEKLLM; + } else if (tokenizer_name == "bert") { + vocab.type = LLAMA_VOCAB_TYPE_WPM; + + // default special tokens + vocab.special_bos_id = 101; + vocab.special_eos_id = 102; + vocab.special_unk_id = 100; + vocab.special_sep_id = -1; + vocab.special_pad_id = -1; + vocab.add_space_prefix = false; } else { LLAMA_LOG_WARN("%s: unknown tokenizer: '%s'", __func__, tokenizer_name.c_str()); LLAMA_LOG_WARN("%s: using default tokenizer: 'llama'", __func__);