From c21d5e13fef3562bf364f343e9a2c7533410f514 Mon Sep 17 00:00:00 2001 From: Haoxiang Fei Date: Fri, 10 May 2024 15:24:35 +0800 Subject: [PATCH] fix: llama-3 ignore_merges --- llama.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/llama.cpp b/llama.cpp index 2f1123d4e..b73281d03 100644 --- a/llama.cpp +++ b/llama.cpp @@ -12292,6 +12292,20 @@ struct llm_tokenizer_bpe { symbols_final.clear(); for (auto & word : word_collection) { + if (vocab.token_to_id.find(word) != vocab.token_to_id.end()) { + llm_symbol sym; + sym.text = word.c_str(); + sym.n = word.size(); + sym.prev = final_prev_index; + sym.next = -1; + if (final_prev_index != -1) { + symbols_final[final_prev_index].next = symbols_final.size(); + } + symbols_final.emplace_back(sym); + final_prev_index = symbols_final.size() - 1; + continue; + } + work_queue = llm_bigram_bpe::queue(); symbols.clear();