diff --git a/llama.cpp b/llama.cpp index d1bbf7f85..676c085da 100644 --- a/llama.cpp +++ b/llama.cpp @@ -17303,7 +17303,7 @@ LLAMA_API int32_t llama_chat_get_model_template( return -1; } else { snprintf(buf, length, "%s", model_template.c_str()); - return model_template.size(); + return model_template.size() + 1; } } @@ -17469,7 +17469,7 @@ LLAMA_API int32_t llama_chat_get_prefix( } std::string output = ss.str(); snprintf(buf, length, "%s", output.c_str()); - return output.size(); + return output.size() + 1; } LLAMA_API int32_t llama_chat_get_postfix( @@ -17551,7 +17551,7 @@ LLAMA_API int32_t llama_chat_get_postfix( } std::string output = ss.str(); snprintf(buf, length, "%s", output.c_str()); - return output.size(); + return output.size() + 1; } LLAMA_API bool llama_chat_support_system_message(const enum llama_chat_template ttmpl) { @@ -17641,7 +17641,7 @@ LLAMA_API int32_t llama_chat_apply_template( if (buf && length > 0) { snprintf(buf, length, "%s", output.c_str()); } - return output.size(); + return output.size() + 1; } LLAMA_API int llama_split_path(char * split_path, size_t maxlen, const char * path_prefix, int split_no, int split_count) { diff --git a/llama.h b/llama.h index 6705ca323..cc25869a0 100644 --- a/llama.h +++ b/llama.h @@ -870,7 +870,7 @@ extern "C" { /// @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message. /// @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages) /// @param length The size of the allocated buffer - /// @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template. + /// @return The total number of bytes of the formatted prompt (null terminator included). If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template. LLAMA_API int32_t llama_chat_apply_template( const struct llama_model * model, const char * tmpl, @@ -885,7 +885,7 @@ extern "C" { /// @param name Template name (can be a nullptr for default template). See: https://github.com/ggerganov/llama.cpp/pull/6588 /// @param buf The output buffer /// @param length The size of the allocated buffer - /// @return The total number of bytes of the template. If a named template cannot be found, it will use default template. If no template can be found, it returns -1 + /// @return The total number of bytes of the template (null terminator included). If a named template cannot be found, it will use default template. If no template can be found, it returns -1 LLAMA_API int32_t llama_chat_get_model_template( const struct llama_model * model, const char * name, @@ -903,7 +903,7 @@ extern "C" { /// @param prev_role The role of the previous message, can be nullptr /// @param buf The output buffer /// @param length The size of the allocated buffer - /// @return The total number of bytes of the output string + /// @return The total number of bytes of the output string (null terminator included) LLAMA_API int32_t llama_chat_get_prefix( const enum llama_chat_template tmpl, const char * role, @@ -917,7 +917,7 @@ extern "C" { /// @param prev_role The role of the previous message, can be nullptr /// @param buf The output buffer /// @param length The size of the allocated buffer - /// @return The total number of bytes of the output string + /// @return The total number of bytes of the output string (null terminator included) LLAMA_API int32_t llama_chat_get_postfix( const enum llama_chat_template tmpl, const char * role, diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 9f1f9e505..c22f8d838 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -103,7 +103,7 @@ int main(void) { formatted_chat.size() ); formatted_chat.resize(res); - std::string output(formatted_chat.data(), formatted_chat.size()); + std::string output(formatted_chat.data()); std::cout << output << "\n-------------------------\n"; assert(output == expected); }