diff --git a/examples/server/oai.hpp b/examples/server/oai.hpp index c0c1f521e..2199df237 100644 --- a/examples/server/oai.hpp +++ b/examples/server/oai.hpp @@ -21,8 +21,8 @@ inline static json oaicompat_completion_params_parse( json llama_params; bool using_chatml = chat_template == "chatml"; std::string formatted_prompt = using_chatml - ? format_chatml(body["messages"]) // OpenAI 'messages' to chatml - : format_mistral(body["messages"]); // OpenAI 'messages' to mistral format + ? format_chatml(body["messages"]) // OpenAI 'messages' to chatml (with <|im_start|>,...) + : format_llama2(body["messages"]); // OpenAI 'messages' to llama2 (with [INST],...) llama_params["__oaicompat"] = true; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2183abcb1..d4161ed92 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1861,7 +1861,7 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms, printf(" -gan N, --grp-attn-n N set the group attention factor to extend context size through self-extend(default: 1=disabled), used together with group attention width `--grp-attn-w`"); printf(" -gaw N, --grp-attn-w N set the group attention width to extend context size through self-extend(default: 512), used together with group attention factor `--grp-attn-n`"); printf(" --chat-template FORMAT_NAME"); - printf(" set chat template, possible valus is: mistral, chatml (default %s)", sparams.chat_template.c_str()); + printf(" set chat template, possible valus is: llama2, chatml (default %s)", sparams.chat_template.c_str()); printf("\n"); } diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 5ec743fb8..b1623c737 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -167,7 +167,7 @@ static T json_value(const json &body, const std::string &key, const T &default_v : default_value; } -inline std::string format_mistral(std::vector messages) +inline std::string format_llama2(std::vector messages) { std::ostringstream output; bool is_inside_turn = false; @@ -190,7 +190,7 @@ inline std::string format_mistral(std::vector messages) } } - LOG_VERBOSE("format_mistral", {{"text", output.str()}}); + LOG_VERBOSE("format_llama2", {{"text", output.str()}}); return output.str(); }