diff --git a/common/common.cpp b/common/common.cpp index 8667bf414..388f650ec 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -2980,7 +2980,7 @@ bool llama_chat_verify_template(const std::string & tmpl) { return res >= 0; } -std::string llama_chat_format(const struct llama_model * model, +std::string llama_chat_apply_template(const struct llama_model * model, const std::string & tmpl, const std::vector & msgs, bool add_ass) { @@ -3010,10 +3010,10 @@ std::string llama_chat_format_single(const struct llama_model * model, const std::vector & past_msg, const llama_chat_msg & new_msg, bool add_ass) { - auto fmt_past_msg = llama_chat_format(model, tmpl, past_msg, false); + auto fmt_past_msg = llama_chat_apply_template(model, tmpl, past_msg, false); std::vector chat_new(past_msg); chat_new.push_back(new_msg); - auto fmt_new_msg = llama_chat_format(model, tmpl, chat_new, add_ass); + auto fmt_new_msg = llama_chat_apply_template(model, tmpl, chat_new, add_ass); auto formatted = fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size()); return formatted; } diff --git a/common/common.h b/common/common.h index 1e4f1583d..6a64bb22b 100644 --- a/common/common.h +++ b/common/common.h @@ -370,7 +370,7 @@ struct llama_chat_msg { bool llama_chat_verify_template(const std::string & tmpl); // CPP wrapper for llama_chat_apply_template -std::string llama_chat_format(const struct llama_model * model, +std::string llama_chat_apply_template(const struct llama_model * model, const std::string & tmpl, const std::vector & chat, bool add_ass); diff --git a/examples/main/main.cpp b/examples/main/main.cpp index f0770ac44..36f060401 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -876,10 +876,8 @@ int main(int argc, char ** argv) { ? chat_add_and_format("user", buffer) : buffer; // TODO: one inconvenient of current chat template implementation is that we can't distinguish between user input and special tokens (prefix/postfix) - bool accept_special_content = params.conversation; - const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true); - const auto line_inp = ::llama_tokenize(ctx, user_inp, false, accept_special_content); + const auto line_inp = ::llama_tokenize(ctx, user_inp, false, params.conversation); const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true); LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str()); diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 4eb0c56a3..7ef2a519a 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -127,7 +127,7 @@ inline std::string format_chat(const struct llama_model * model, const std::stri chat.push_back({role, content}); } - auto formatted_chat = llama_chat_format(model, tmpl, chat, true); + auto formatted_chat = llama_chat_apply_template(model, tmpl, chat, true); LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}}); return formatted_chat; }