diff --git a/llama.h b/llama.h
index e0fe2e0bb..64140bde2 100644
--- a/llama.h
+++ b/llama.h
@@ -706,12 +706,12 @@ extern "C" {
 
     /// Apply chat template and maybe tokenize it. Inspired by hf apply_chat_template() on python.
     /// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model"
-    /// NOTE: This function only support some know jinja templates. It is not a jinja parser.
+    /// NOTE: This function only support some known jinja templates. It is not a jinja parser.
     /// @param custom_template A Jinja template to use for this conversion. If this is nullptr, the model’s default chat template will be used instead.
     /// @param msg Pointer to a list of multiple llama_chat_message
     /// @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message.
-    /// @return If "tokenize" is set to false, the "buf" must be a string (returned value will be the string length).
-    ///         Otherwise, "buf" must be a list of tokens (returned value will be the number of tokens).
+    /// @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages)
+    /// @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
     LLAMA_API int32_t llama_chat_apply_template(
               const struct llama_model * model,
                             const char * custom_template,