diff --git a/common/common.cpp b/common/common.cpp
index 8667bf414..388f650ec 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2980,7 +2980,7 @@ bool llama_chat_verify_template(const std::string & tmpl) {
     return res >= 0;
 }
 
-std::string llama_chat_format(const struct llama_model * model,
+std::string llama_chat_apply_template(const struct llama_model * model,
         const std::string & tmpl,
         const std::vector<llama_chat_msg> & msgs,
         bool add_ass) {
@@ -3010,10 +3010,10 @@ std::string llama_chat_format_single(const struct llama_model * model,
         const std::vector<llama_chat_msg> & past_msg,
         const llama_chat_msg & new_msg,
         bool add_ass) {
-    auto fmt_past_msg = llama_chat_format(model, tmpl, past_msg, false);
+    auto fmt_past_msg = llama_chat_apply_template(model, tmpl, past_msg, false);
     std::vector<llama_chat_msg> chat_new(past_msg);
     chat_new.push_back(new_msg);
-    auto fmt_new_msg = llama_chat_format(model, tmpl, chat_new, add_ass);
+    auto fmt_new_msg = llama_chat_apply_template(model, tmpl, chat_new, add_ass);
     auto formatted = fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
     return formatted;
 }
diff --git a/common/common.h b/common/common.h
index 1e4f1583d..6a64bb22b 100644
--- a/common/common.h
+++ b/common/common.h
@@ -370,7 +370,7 @@ struct llama_chat_msg {
 bool llama_chat_verify_template(const std::string & tmpl);
 
 // CPP wrapper for llama_chat_apply_template
-std::string llama_chat_format(const struct llama_model * model,
+std::string llama_chat_apply_template(const struct llama_model * model,
         const std::string & tmpl,
         const std::vector<llama_chat_msg> & chat,
         bool add_ass);
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index f0770ac44..36f060401 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -876,10 +876,8 @@ int main(int argc, char ** argv) {
                         ? chat_add_and_format("user", buffer)
                         : buffer;
                     // TODO: one inconvenient of current chat template implementation is that we can't distinguish between user input and special tokens (prefix/postfix)
-                    bool accept_special_content = params.conversation;
-
                     const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
-                    const auto line_inp = ::llama_tokenize(ctx, user_inp,            false, accept_special_content);
+                    const auto line_inp = ::llama_tokenize(ctx, user_inp,            false, params.conversation);
                     const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
 
                     LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index 4eb0c56a3..7ef2a519a 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -127,7 +127,7 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
         chat.push_back({role, content});
     }
 
-    auto formatted_chat = llama_chat_format(model, tmpl, chat, true);
+    auto formatted_chat = llama_chat_apply_template(model, tmpl, chat, true);
     LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}});
     return formatted_chat;
 }