diff --git a/llama.h b/llama.h index e0fe2e0bb..64140bde2 100644 --- a/llama.h +++ b/llama.h @@ -706,12 +706,12 @@ extern "C" { /// Apply chat template and maybe tokenize it. Inspired by hf apply_chat_template() on python. /// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model" - /// NOTE: This function only support some know jinja templates. It is not a jinja parser. + /// NOTE: This function only support some known jinja templates. It is not a jinja parser. /// @param custom_template A Jinja template to use for this conversion. If this is nullptr, the model’s default chat template will be used instead. /// @param msg Pointer to a list of multiple llama_chat_message /// @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message. - /// @return If "tokenize" is set to false, the "buf" must be a string (returned value will be the string length). - /// Otherwise, "buf" must be a list of tokens (returned value will be the number of tokens). + /// @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages) + /// @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template. LLAMA_API int32_t llama_chat_apply_template( const struct llama_model * model, const char * custom_template,