diff --git a/examples/server/server.cpp b/examples/server/server.cpp index d7fb61812..c7b05cb16 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2605,7 +2605,7 @@ int main(int argc, char ** argv) { // if a custom chat template is not supplied, we will use the one that comes with the model (if any) if (params.chat_template.empty()) { if (!ctx_server.validate_model_chat_template()) { - LOG_ERROR("The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {}); + LOG_WARNING("The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {}); params.chat_template = "chatml"; } } @@ -2967,11 +2967,17 @@ int main(int argc, char ** argv) { }; const auto handle_props = [&ctx_server](const httplib::Request & req, httplib::Response & res) { + std::vector model_template(2048, 0); // longest known template is about 1200 bytes + std::string template_key = "tokenizer.chat_template", curr_tmpl; + if (llama_model_meta_val_str(ctx_server.model, template_key.c_str(), model_template.data(), model_template.size()) > 0) { + curr_tmpl = std::string(model_template.data(), model_template.size()); + } res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); json data = { { "system_prompt", ctx_server.system_prompt.c_str() }, { "default_generation_settings", ctx_server.default_generation_settings_for_props }, - { "total_slots", ctx_server.params.n_parallel } + { "total_slots", ctx_server.params.n_parallel }, + { "model_template", curr_tmpl.c_str() } }; res.set_content(data.dump(), "application/json; charset=utf-8");