From d92f518253a5eaed36bcf98aefd99a716a5e5b08 Mon Sep 17 00:00:00 2001 From: MaggotHATE Date: Thu, 21 Nov 2024 22:46:44 +0500 Subject: [PATCH] Simplify logics even further * if no `chat_template` is passed, we can rely on `common_chat_apply_template` function --- examples/server/server.cpp | 1 - examples/server/utils.hpp | 30 ++++++++++++++---------------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index a7a86548b..5ccc3ae0d 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3226,7 +3226,6 @@ int main(int argc, char ** argv) { LOG_WRN("%s: Prefix and suffix will be used for a custom chat template. This may cause the model to output suboptimal responses\n", __func__); } else if (!ctx_server.validate_model_chat_template()) { LOG_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__); - params.chat_template = "chatml"; } } else if (!params.input_prefix.empty() || !params.input_suffix.empty()) { LOG_WRN("%s: Prefix and suffix are defined, but will not be used because a chat template '%s' is chosen.\n", __func__, params.chat_template.c_str()); diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 1d89a8262..affb1d669 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -304,7 +304,7 @@ inline std::string format_chat(const struct llama_model * model, const std::stri std::vector chat; std::string formatted_chat; - bool is_custom = !prefix.empty() || !suffix.empty(); + bool is_custom = tmpl.empty() && (!prefix.empty() || !suffix.empty()); for (size_t i = 0; i < messages.size(); ++i) { const auto & curr_msg = messages[i]; @@ -337,7 +337,13 @@ inline std::string format_chat(const struct llama_model * model, const std::stri } } - if (!is_custom) formatted_chat = common_chat_apply_template(model, tmpl, chat, true); + if (!is_custom) { + LOG_WRN("Using '%s' template, prefix and suffix are ignored.\n", tmpl.c_str()); + formatted_chat = common_chat_apply_template(model, tmpl, chat, true); + } else { + LOG_WRN("Used prefix '%s' and suffix '%s'.\n", prefix.c_str(), suffix.c_str()); + } + LOG_DBG("formatted_chat using '%s': '%s'\n", tmpl.c_str(), formatted_chat.c_str()); return formatted_chat; @@ -353,7 +359,7 @@ inline std::string format_chat_example(const struct llama_model * model, const s std::string formatted_example; - if (!prefix.empty() || !suffix.empty()) { + if (tmpl.empty() && (!prefix.empty() || !suffix.empty())) { for (auto message : msgs) { if (message.role == "user") formatted_example += prefix + message.content + suffix; else formatted_example += message.content; @@ -640,20 +646,12 @@ static json oaicompat_completion_params_parse( std::string prefix = (body.contains("input_prefix") ? body.at("input_prefix").get() : ""); std::string suffix = (body.contains("input_suffix") ? body.at("input_suffix").get() : ""); - // if template is sent in data, ignore prefix and suffix - if (!chat_tmpl.empty()) { - LOG_WRN("\nUsing '%s' template, prefix and suffix are ignored.\n", chat_tmpl.c_str()); - prefix = ""; - suffix = ""; - } else { - if (prefix.empty()) { - prefix = input_prefix; - } + if (prefix.empty()) { + prefix = input_prefix; + } - if (suffix.empty()) { - suffix = input_suffix; - } - LOG_WRN("\nUsing prefix '%s' and suffix '%s'.\n", prefix.c_str(), suffix.c_str()); + if (suffix.empty()) { + suffix = input_suffix; } llama_params["prompt"] = format_chat(model, chat_tmpl, prefix, suffix, body.at("messages"));