diff --git a/llama.cpp b/llama.cpp index 2f1123d4e..b73281d03 100644 --- a/llama.cpp +++ b/llama.cpp @@ -12292,6 +12292,20 @@ struct llm_tokenizer_bpe { symbols_final.clear(); for (auto & word : word_collection) { + if (vocab.token_to_id.find(word) != vocab.token_to_id.end()) { + llm_symbol sym; + sym.text = word.c_str(); + sym.n = word.size(); + sym.prev = final_prev_index; + sym.next = -1; + if (final_prev_index != -1) { + symbols_final[final_prev_index].next = symbols_final.size(); + } + symbols_final.emplace_back(sym); + final_prev_index = symbols_final.size() - 1; + continue; + } + work_queue = llm_bigram_bpe::queue(); symbols.clear();