server : add llama2 chat template (#5425)

* server: add mistral chat template * server: fix typo * server: rename template mistral to llama2 * server: format_llama2: remove BOS * server: validate "--chat-template" argument * server: clean up using_chatml variable Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> --------- Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
2024-02-11 11:16:22 +01:00 · 2024-02-11 11:16:22 +01:00 · 907e08c110
commit 907e08c110
parent f026f8120f
3 changed files with 56 additions and 4 deletions
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -167,6 +167,34 @@ static T json_value(const json &body, const std::string &key, const T &default_v
        : default_value;
 }

+inline std::string format_llama2(std::vector<json> messages)
+{
+    std::ostringstream output;
+    bool is_inside_turn = false;
+
+    for (auto it = messages.begin(); it != messages.end(); ++it) {
+        if (!is_inside_turn) {
+            output << "[INST] ";
+        }
+        std::string role    = json_value(*it, "role", std::string("user"));
+        std::string content = json_value(*it, "content", std::string(""));
+        if (role == "system") {
+            output << "<<SYS>>\n" << content << "\n<<SYS>>\n\n";
+            is_inside_turn = true;
+        } else if (role == "user") {
+            output << content << " [/INST]";
+            is_inside_turn = true;
+        } else {
+            output << " " << content << " </s>";
+            is_inside_turn = false;
+        }
+    }
+
+    LOG_VERBOSE("format_llama2", {{"text", output.str()}});
+
+    return output.str();
+}
+
 inline std::string format_chatml(std::vector<json> messages)
 {
    std::ostringstream chatml_msgs;
@ -180,6 +208,8 @@ inline std::string format_chatml(std::vector<json> messages)

    chatml_msgs << "<|im_start|>assistant" << '\n';

+    LOG_VERBOSE("format_chatml", {{"text", chatml_msgs.str()}});
+
    return chatml_msgs.str();
 }