From efb5dac33707f5166aadc431f2c016a07285159a Mon Sep 17 00:00:00 2001 From: eric8607242 Date: Thu, 27 Jul 2023 16:59:42 +0800 Subject: [PATCH] supporting more diverse tokenizers --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 9a8ecdcf6..8c6395da7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1924,7 +1924,7 @@ struct llama_tokenizer { if (token == vocab_.token_to_id.end()) { // output any symbols that did not form tokens as bytes. for (int j = 0; j < (int) symbol.n; ++j) { - llama_vocab::id token_id = static_cast(symbol.text[j]) + 3; + llama_vocab::id token_id = vocab_.token_to_id.at(std::string(1, symbol.text[j])); output.push_back(token_id); } } else {