diff --git a/llama.cpp b/llama.cpp index bb5d35af8..889269161 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2242,23 +2242,23 @@ static void llm_load_vocab( // special tokens { - const std::vector> special_token_types = { - { LLM_KV_TOKENIZER_BOS_ID, &vocab.special_bos_id }, - { LLM_KV_TOKENIZER_EOS_ID, &vocab.special_eos_id }, - { LLM_KV_TOKENIZER_UNK_ID, &vocab.special_unk_id }, - { LLM_KV_TOKENIZER_SEP_ID, &vocab.special_sep_id }, - { LLM_KV_TOKENIZER_PAD_ID, &vocab.special_pad_id }, + const std::vector> special_token_types = { + { LLM_KV_TOKENIZER_BOS_ID, vocab.special_bos_id }, + { LLM_KV_TOKENIZER_EOS_ID, vocab.special_eos_id }, + { LLM_KV_TOKENIZER_UNK_ID, vocab.special_unk_id }, + { LLM_KV_TOKENIZER_SEP_ID, vocab.special_sep_id }, + { LLM_KV_TOKENIZER_PAD_ID, vocab.special_pad_id }, }; - for (auto & it : special_token_types ) { - int32_t id = -1; - const std::string kstr = kv(std::get<0>(it)); + for (const auto & it : special_token_types ) { + const std::string key = kv(std::get<0>(it)); + int32_t & id = std::get<1>(it), old_id = id; - GGUF_GET_KEY(ctx, id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kstr); + GGUF_GET_KEY(ctx, id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, key); if (id != -1 && (id < 0 || size_t(id) >= vocab.id_to_token.size())) { - LLAMA_LOG_WARN("%s: bad special token value %d for key '%s' -- ignoring\n", __func__, id, kstr.c_str()); - continue; + LLAMA_LOG_WARN("%s: bad special token: '%s' = %d, using default id %d\n", + __func__, key.c_str(), id, old_id); + id = old_id; } - *(std::get<1>(it)) = id; } }