diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 1a2212502..a7102b296 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -149,7 +149,7 @@ inline std::string format_chat(const struct llama_model * model, const std::stri res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size()); } - const std::string formatted_chat(buf.data(), res); + const std::string formatted_chat(buf.data()); LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}}); diff --git a/llama.cpp b/llama.cpp index 76e0367b8..a42235234 100644 --- a/llama.cpp +++ b/llama.cpp @@ -17106,7 +17106,7 @@ LLAMA_API int32_t llama_chat_get_model_template( return -1; } else { snprintf(buf, length, "%s", model_template.c_str()); - return model_template.size() + 1; + return model_template.size(); } } @@ -17162,7 +17162,7 @@ LLAMA_API llama_chat_template llama_chat_get_template_type(const char * tmpl) { } LLAMA_API int32_t llama_chat_get_prefix( - const llama_chat_template tmpl, + const llama_chat_template ttmpl, const char * role, const char * prev_role, char * buf, @@ -17170,6 +17170,7 @@ LLAMA_API int32_t llama_chat_get_prefix( std::stringstream ss; std::string srole(role); std::string sprev_role(prev_role == nullptr ? "" : prev_role); + // str_toupper converts a string to all upper case, example: "abc" ==> "ABC" auto str_toupper = [](std::string & str) { std::string output(str); for (size_t i = 0; i < output.size(); i++) { @@ -17177,12 +17178,14 @@ LLAMA_API int32_t llama_chat_get_prefix( } return output; }; + // str_tofirstcap transforms first letter to uppercase, example: "abc" ==> "Abc" auto str_tofirstcap = [](std::string & str) { std::string output(str); output[0] = toupper(output[0]); return output; }; - switch (tmpl) { + // ttmpl means "typed template" + switch (ttmpl) { case LLAMA_CHAT_TEMPLATE_NOT_SUPPORTED: return -1; case LLAMA_CHAT_TEMPLATE_CHATML: @@ -17267,7 +17270,7 @@ LLAMA_API int32_t llama_chat_get_prefix( } LLAMA_API int32_t llama_chat_get_postfix( - const llama_chat_template tmpl, + const llama_chat_template ttmpl, const char * role, const char * prev_role, char * buf, @@ -17275,7 +17278,7 @@ LLAMA_API int32_t llama_chat_get_postfix( std::stringstream ss; std::string srole(role); std::string sprev_role(prev_role == nullptr ? "" : prev_role); - switch (tmpl) { + switch (ttmpl) { case LLAMA_CHAT_TEMPLATE_NOT_SUPPORTED: return -1; case LLAMA_CHAT_TEMPLATE_CHATML: @@ -17345,8 +17348,8 @@ LLAMA_API int32_t llama_chat_get_postfix( return output.size(); } -LLAMA_API bool llama_chat_support_system_message(const llama_chat_template tmpl) { - switch (tmpl) { +LLAMA_API bool llama_chat_support_system_message(const llama_chat_template ttmpl) { + switch (ttmpl) { case LLAMA_CHAT_TEMPLATE_CHATML: case LLAMA_CHAT_TEMPLATE_LLAMA2_SYS_BOS: case LLAMA_CHAT_TEMPLATE_LLAMA2_SYS: @@ -17371,6 +17374,8 @@ LLAMA_API int32_t llama_chat_apply_template( bool add_ass, char * buf, int32_t length) { + // either model or tmpl must be given + GGML_ASSERT(model != nullptr || tmpl != nullptr); std::string curr_tmpl(tmpl == nullptr ? "" : tmpl); if (tmpl == nullptr) { std::vector model_template(2048, 0); // longest known template is about 1200 bytes