diff --git a/llama.cpp b/llama.cpp index 8c6395da7..fdb7ba1bb 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1924,6 +1924,8 @@ struct llama_tokenizer { if (token == vocab_.token_to_id.end()) { // output any symbols that did not form tokens as bytes. for (int j = 0; j < (int) symbol.n; ++j) { + // NOTE: old version, before #2420 - not sure what are the implications of this + //llama_vocab::id token_id = static_cast(symbol.text[j]) + 3; llama_vocab::id token_id = vocab_.token_to_id.at(std::string(1, symbol.text[j])); output.push_back(token_id); }