From f5b78255957918017caea7834410d3e0789cb2de Mon Sep 17 00:00:00 2001
From: ochafik <ochafik@google.com>
Date: Thu, 31 Oct 2024 13:52:46 +0000
Subject: [PATCH] `tool-call`: code_interpreter & system + tool call support
 for all jinja templates!

---
 common/chat-template.hpp                      |  74 +++++++++--
 common/tool-call.cpp                          | 119 +++++++++++++-----
 examples/server/tests/features/steps/steps.py |  33 ++++-
 .../server/tests/features/tool_call.feature   |  54 ++++----
 scripts/update_jinja_goldens.py               |   9 --
 ...I-c4ai-command-r-plus-default-tool_use.txt |  49 ++++++++
 ...rmes-2-Pro-Llama-3-8B-default-tool_use.txt |  73 +++++++++++
 ...rmes-2-Pro-Mistral-7B-default-tool_use.txt |  73 +++++++++++
 ...Hermes-3-Llama-3.1-8B-default-tool_use.txt |  75 +++++++++++
 .../OrionStarAI-Orion-14B-Chat-system.txt     |   3 +-
 .../OrionStarAI-Orion-14B-Chat-tool_use.txt   |  61 +++++++++
 .../Qwen-Qwen2-7B-Instruct-tool_use.txt       |  75 +++++++++++
 .../Qwen-Qwen2-VL-7B-Instruct-tool_use.txt    |  75 +++++++++++
 ...Bloke-FusionNet_34Bx2_MoE-AWQ-tool_use.txt |  49 ++++++++
 ...t-Metamath-OrcaVicuna-Mistral-tool_use.txt |  49 ++++++++
 ...ofenghuang-vigogne-2-70b-chat-tool_use.txt |  53 ++++++++
 ...ai-DeepSeek-Coder-V2-Instruct-tool_use.txt |  61 +++++++++
 .../deepseek-ai-DeepSeek-V2.5-tool_use.txt    |  49 ++++++++
 ...i-deepseek-coder-33b-instruct-tool_use.txt |  80 ++++++++++++
 .../goldens/google-gemma-2-2b-it-system.txt   |   6 +
 .../goldens/google-gemma-2-2b-it-tool_use.txt |  73 +++++++++++
 .../goldens/google-gemma-7b-it-system.txt     |   6 +
 .../goldens/google-gemma-7b-it-tool_use.txt   |  73 +++++++++++
 ...-MiniCPM-3B-OpenHermes-2.5-v2-tool_use.txt |  49 ++++++++
 ...rosoft-Phi-3-medium-4k-instruct-system.txt |   1 +
 ...soft-Phi-3-medium-4k-instruct-tool_use.txt |  72 +++++++++++
 ...rosoft-Phi-3-mini-4k-instruct-tool_use.txt |  73 +++++++++++
 ...osoft-Phi-3-small-8k-instruct-tool_use.txt |  73 +++++++++++
 ...crosoft-Phi-3.5-mini-instruct-tool_use.txt |  73 +++++++++++
 ...osoft-Phi-3.5-vision-instruct-tool_use.txt |  72 +++++++++++
 ...alai-Mistral-7B-Instruct-v0.2-tool_use.txt |  49 ++++++++
 ...ai-Mixtral-8x7B-Instruct-v0.1-tool_use.txt |  49 ++++++++
 .../mlabonne-AlphaMonarch-7B-tool_use.txt     |  73 +++++++++++
 .../openchat-openchat-3.5-0106-tool_use.txt   |  49 ++++++++
 ...ium-OpenHermes-2.5-Mistral-7B-tool_use.txt |  73 +++++++++++
 tests/test-tool-call.cpp                      |  24 +++-
 36 files changed, 1919 insertions(+), 83 deletions(-)
 create mode 100644 tests/chat/goldens/CohereForAI-c4ai-command-r-plus-default-tool_use.txt
 create mode 100644 tests/chat/goldens/NousResearch-Hermes-2-Pro-Llama-3-8B-default-tool_use.txt
 create mode 100644 tests/chat/goldens/NousResearch-Hermes-2-Pro-Mistral-7B-default-tool_use.txt
 create mode 100644 tests/chat/goldens/NousResearch-Hermes-3-Llama-3.1-8B-default-tool_use.txt
 create mode 100644 tests/chat/goldens/OrionStarAI-Orion-14B-Chat-tool_use.txt
 create mode 100644 tests/chat/goldens/Qwen-Qwen2-7B-Instruct-tool_use.txt
 create mode 100644 tests/chat/goldens/Qwen-Qwen2-VL-7B-Instruct-tool_use.txt
 create mode 100644 tests/chat/goldens/TheBloke-FusionNet_34Bx2_MoE-AWQ-tool_use.txt
 create mode 100644 tests/chat/goldens/abacusai-Fewshot-Metamath-OrcaVicuna-Mistral-tool_use.txt
 create mode 100644 tests/chat/goldens/bofenghuang-vigogne-2-70b-chat-tool_use.txt
 create mode 100644 tests/chat/goldens/deepseek-ai-DeepSeek-Coder-V2-Instruct-tool_use.txt
 create mode 100644 tests/chat/goldens/deepseek-ai-DeepSeek-V2.5-tool_use.txt
 create mode 100644 tests/chat/goldens/deepseek-ai-deepseek-coder-33b-instruct-tool_use.txt
 create mode 100644 tests/chat/goldens/google-gemma-2-2b-it-system.txt
 create mode 100644 tests/chat/goldens/google-gemma-2-2b-it-tool_use.txt
 create mode 100644 tests/chat/goldens/google-gemma-7b-it-system.txt
 create mode 100644 tests/chat/goldens/google-gemma-7b-it-tool_use.txt
 create mode 100644 tests/chat/goldens/indischepartij-MiniCPM-3B-OpenHermes-2.5-v2-tool_use.txt
 create mode 100644 tests/chat/goldens/microsoft-Phi-3-medium-4k-instruct-tool_use.txt
 create mode 100644 tests/chat/goldens/microsoft-Phi-3-mini-4k-instruct-tool_use.txt
 create mode 100644 tests/chat/goldens/microsoft-Phi-3-small-8k-instruct-tool_use.txt
 create mode 100644 tests/chat/goldens/microsoft-Phi-3.5-mini-instruct-tool_use.txt
 create mode 100644 tests/chat/goldens/microsoft-Phi-3.5-vision-instruct-tool_use.txt
 create mode 100644 tests/chat/goldens/mistralai-Mistral-7B-Instruct-v0.2-tool_use.txt
 create mode 100644 tests/chat/goldens/mistralai-Mixtral-8x7B-Instruct-v0.1-tool_use.txt
 create mode 100644 tests/chat/goldens/mlabonne-AlphaMonarch-7B-tool_use.txt
 create mode 100644 tests/chat/goldens/openchat-openchat-3.5-0106-tool_use.txt
 create mode 100644 tests/chat/goldens/teknium-OpenHermes-2.5-Mistral-7B-tool_use.txt

diff --git a/common/chat-template.hpp b/common/chat-template.hpp
index 4dd381cef..1e58a7d1f 100644
--- a/common/chat-template.hpp
+++ b/common/chat-template.hpp
@@ -83,11 +83,13 @@ class chat_template {
         bool add_generation_prompt,
         const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const
     {
-        auto actual_messages = messages;
+        json actual_messages;
 
         // First, "fix" messages so they have a chance to be rendered correctly by the template
 
-        if (_requires_object_arguments || !_supports_system_role) {
+        if (_requires_object_arguments || !_supports_system_role || !_supports_tools) {
+            actual_messages = json::array();
+            
             std::string pending_system;
             auto flush_sys = [&]() {
                 if (!pending_system.empty()) {
@@ -98,12 +100,66 @@ class chat_template {
                     pending_system.clear();
                 }
             };
-            for (auto & message : actual_messages) {
+            for (const auto & message_ : messages) {
+                auto message = message_;
                 if (!message.contains("role") || !message.contains("content")) {
                     throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump());
                 }
                 std::string role = message.at("role");
 
+                if (message.contains("tool_calls")) {
+                    if (_requires_object_arguments || !_supports_tools) {
+                        for (auto & tool_call : message.at("tool_calls")) {
+                            if (tool_call["type"] == "function") {
+                                auto & function = tool_call.at("function");
+                                std::string arguments = function.at("arguments");
+                                function["arguments"] = json::parse(arguments);
+                            }
+                        }
+                    }
+                    if (!_supports_tools) {
+                        auto content = message.at("content");
+                        auto tool_calls = json::array();
+                        for (const auto & tool_call : message.at("tool_calls")) {
+                            if (tool_call.at("type") != "function") {
+                                continue;
+                            }
+                            const auto & function = tool_call.at("function");
+                            auto tc = json {
+                                {"name", function.at("name")},
+                                {"arguments", function.at("arguments")},
+                            };
+                            if (tool_call.contains("id")) {
+                                tc["id"] = tool_call["id"];
+                            }
+                            tool_calls.push_back(tc);
+                        }
+                        auto obj = json {
+                            {"tool_calls", tool_calls},
+                        };
+                        if (!content.is_null() && content != "") {
+                            obj["content"] = content;
+                        }
+                        message["content"] = obj.dump(2);
+                        message.erase("tool_calls");
+                    }
+                }
+                if (!_supports_tools && role == "tool") {
+                    message["role"] = "user";
+                    auto obj = json {
+                        {"tool_response", {
+                            {"tool", message.at("name")},
+                            {"content", message.at("content")},
+                        }},
+                    };
+                    if (message.contains("tool_call_id")) {
+                        obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
+                    }       
+                    message["content"] = obj.dump(2);
+                    message.erase("name");
+                }
+
+                // std::string content = message["content"];
                 if (!message["content"].is_null() && !_supports_system_role) {
                     std::string content = message.at("content");
                     if (role == "system") {
@@ -121,17 +177,11 @@ class chat_template {
                         }
                     }
                 }
-                if (_requires_object_arguments && message.contains("tool_calls")) {
-                    for (auto & tool_call : message.at("tool_calls")) {
-                        if (tool_call["type"] == "function") {
-                            auto & function = tool_call.at("function");
-                            std::string arguments = function.at("arguments");
-                            function["arguments"] = json::parse(arguments);
-                        }
-                    }
-                }
+                actual_messages.push_back(message);
             }
             flush_sys();
+        } else {
+            actual_messages = messages;
         }
 
         auto context = minja::Context::make(json({
diff --git a/common/tool-call.cpp b/common/tool-call.cpp
index 377c9f722..adff1b2f8 100644
--- a/common/tool-call.cpp
+++ b/common/tool-call.cpp
@@ -12,6 +12,41 @@
 
 using json = nlohmann::ordered_json;
 
+static json normalize_tools(const json & tools) {
+    static const auto python_tool = json::parse(R"({
+        "type": "function",
+        "function": {
+            "name": "python",
+            "description": "Runs code in an Python interpreter and returns the result of the execution after 60 seconds.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "code": {
+                        "type": "string",
+                        "description": "The code to run in the Python interpreter."
+                    }
+                },
+                "required": ["code"]
+            }
+        }
+    })");
+
+    auto results = json::array();
+    for (const auto & tool : tools) {
+        if (!tool.contains("type")) {
+            continue;
+        }
+        if (tool["type"] == "code_interpreter") {
+            results.push_back(python_tool);
+        } else if (tool["type"] == "function") {
+            results.push_back(tool);
+        } else {
+            continue;
+        }
+    }
+    return results;
+}
+
 std::string llama_tool_call_style_name(llama_tool_call_style style) {
     switch (style) {
         case llama_tool_call_style::None:
@@ -121,8 +156,14 @@ static llama_tool_calls parse_json_tool_calls(const json & tools, const std::str
     std::unordered_set<std::string> tool_names;
     if (check_names) {
         for (const auto & tool : tools) {
-            if (tool.contains("type") && tool["type"] == "function") {
+            if (!tool.contains("type")) {
+                continue;
+            }
+            std::string type = tool.at("type");
+            if (type == "function") {
                 tool_names.insert(tool["function"]["name"]);
+            } else if (type == "code_interpreter") {
+                tool_names.insert("python");
             }
         }
     }
@@ -210,7 +251,7 @@ static llama_tool_calls parse_llama_3_tool_calls(const json & tools, const std::
                 /* .content = */ match.prefix().str(),
                 /* .tool_calls = */ {
                     {
-                        /* .name = */ "ipython",
+                        /* .name = */ "python",
                         /* .arguments = */ (json {{"code", match[1].str()}}).dump(),
                         /* .id = */ "",
                     },
@@ -232,7 +273,7 @@ static llama_tool_calls parse_functionary_v3_llama_3_1_tool_calls(const json & t
             /* .content = */ match.prefix().str(),
             /* .tool_calls = */ {
                 {
-                    /* .name = */ "ipython",
+                    /* .name = */ "python",
                     /* .arguments = */ (json {{"code", match[1].str()}}).dump(),
                     /* .id = */ "",
                 },
@@ -258,7 +299,7 @@ static llama_tool_calls parse_generic_tool_calls(const std::string& input) {
             result.tool_calls.push_back({
                 tool_call["name"],
                 tool_call["arguments"].dump(),
-                /* id= */ "",
+                tool_call.contains("id") ? tool_call["id"] : "",
             });
         }
     } else if (data.contains("tool_call")) {
@@ -307,7 +348,7 @@ static llama_tool_calls parse_mistral_nemo_tool_calls(const std::string& input)
 }
 
 llama_tool_calls parse_tool_calls(llama_tool_call_style style, const json & tools, const std::string& input) {
-    // fprintf(stderr, "# parse_tool_calls:\n\n%s\n\n", input.c_str());
+    // fprintf(stderr, "# parse_tool_calls(%s):\n\n%s\n\n", llama_tool_call_style_name(style).c_str(), input.c_str());
     switch (style) {
         case llama_tool_call_style::None:
             return {input, {}};
@@ -361,15 +402,13 @@ llama_tool_call_handler llama_tool_call_handler_init(
             handler.prompt = tmpl.apply(messages, tools, /* add_generation_prompt= */ true);
             break;
         case llama_tool_call_style::Generic: {
+            auto actual_tools = normalize_tools(tools);
             auto tool_call_schemas = json::array();
-            for (const auto & tool : tools) {
-                if (tool["type"] != "function") {
-                    continue;
-                }
+            for (const auto & tool : actual_tools) {
                 const auto & function = tool["function"];
                 std::string name = function["name"];
                 auto parameters = function["parameters"];
-                tool_call_schemas.emplace_back(json {
+                auto tool_schema = json {
                     {"type", "object"},
                     {"properties", {
                         {"name", {
@@ -379,7 +418,18 @@ llama_tool_call_handler llama_tool_call_handler_init(
                         {"arguments", parameters},
                     }},
                     {"required", json::array({"name", "arguments"})},
-                });
+                };
+                if (function.contains("description")) {
+                    tool_schema["description"] = function["description"];
+                }
+                if (parallel) {
+                    tool_schema["properties"]["id"] = {
+                        {"type", "string"},
+                        {"minLength", 4},
+                    };
+                    tool_schema["required"].push_back("id");
+                }
+                tool_call_schemas.emplace_back(tool_schema);
             }
             const auto tool_call =
                 parallel
@@ -424,16 +474,14 @@ llama_tool_call_handler llama_tool_call_handler_init(
             auto tweaked_messages = add_system(
                 messages,
                 "Respond in JSON format, either with a request to call tools or with a response to the user's request. Here is the schema for all responses:\n\n```json\n" + schema.dump(2) + "\n```");
-            handler.prompt = tmpl.apply(tweaked_messages, tools, /* add_generation_prompt= */ true);
+            handler.prompt = tmpl.apply(tweaked_messages, actual_tools.empty() ? json() : actual_tools, /* add_generation_prompt= */ true);
             break;
         }
         case llama_tool_call_style::MistralNemo: {
+            auto actual_tools = normalize_tools(tools);
             handler.grammar = build_grammar([&](const llama_grammar_builder & builder) {
                 auto schemas = json::array();
-                for (const auto & tool : tools) {
-                    if (tool["type"] != "function") {
-                        continue;
-                    }
+                for (const auto & tool : actual_tools) {
                     const auto & function = tool["function"];
                     std::string name = function["name"];
                     auto parameters = function["parameters"];
@@ -472,12 +520,22 @@ llama_tool_call_handler llama_tool_call_handler_init(
                 handler.grammar_trigger_words.push_back("[{\"arguments\":");
             }
             // auto tweaked_messages = add_system(messages, "You are a helpful AI with tool calling capabilities. Prefix any tool calls with [TOOL_CALLS]");
-            handler.prompt = tmpl.apply(messages, tools, /* add_generation_prompt= */ true);
+            handler.prompt = tmpl.apply(messages, actual_tools.empty() ? json() : actual_tools, /* add_generation_prompt= */ true);
             break;
         }
         case llama_tool_call_style::Llama31:
         case llama_tool_call_style::Llama32: {
-            static auto builtin_tools = json {"wolfram_alpha", "brave_search", "code_interpreter"};
+            auto builtin_tools = json {"wolfram_alpha", "brave_search"};
+            for (const auto & tool : tools) {
+                if (!tool.contains("type")) {
+                    continue;
+                }
+                if (tool["type"] == "code_interpreter") {
+                    builtin_tools.push_back("code_interpreter");
+                    break;
+                }
+            }
+            auto actual_tools = normalize_tools(tools);
 
             auto uses_python_tag = style == llama_tool_call_style::Llama31;
 
@@ -490,7 +548,7 @@ llama_tool_call_handler llama_tool_call_handler_init(
             handler.grammar = build_grammar([&](const llama_grammar_builder & builder) {
                 std::vector<std::string> tool_rules;
 
-                for (const auto & tool : tools) {
+                for (const auto & tool : actual_tools) {
                     const auto & function = tool["function"];
                     std::string name = function["name"];
                     auto parameters = function["parameters"];
@@ -531,7 +589,7 @@ llama_tool_call_handler llama_tool_call_handler_init(
                 builder.add_rule("root", join(tool_rules.begin(), tool_rules.end(), " | "));
             });
             handler.additional_stop_words.push_back("<|eom_id|>");
-            handler.prompt = tmpl.apply(messages, tools, /* add_generation_prompt= */ true, {
+            handler.prompt = tmpl.apply(messages, actual_tools.empty() ? json() : actual_tools, /* add_generation_prompt= */ true, {
                 {"builtin_tools", builtin_tools},
             });
             break;
@@ -539,20 +597,20 @@ llama_tool_call_handler llama_tool_call_handler_init(
         case llama_tool_call_style::FunctionaryV3Llama3: {
             // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
             // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
+            auto actual_tools = normalize_tools(tools);
             handler.grammar = build_grammar([&](const llama_grammar_builder & builder) {
                 std::vector<std::string> first_tool_rules;
                 std::vector<std::string> subsequent_tool_rules;
-                for (size_t i = 0, n = tools.size(); i < n; i++) {
-                    auto & tool = tools[i];
+                for (const auto & tool : actual_tools) {
                     const auto & function = tool["function"];
                     std::string name = function["name"];
                     auto parameters = function["parameters"];
                     auto args_rule = builder.add_schema(name + "-args", parameters);
                     first_tool_rules.push_back(builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule));
-                    subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule));
+                    subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\"\\n>>>" + name + "\\n\" " + args_rule));
                     if (allow_content) {
                         handler.grammar_trigger_words.push_back(name + "\n");
-                        handler.grammar_trigger_words.push_back(">>>" + name + "\n");
+                        handler.grammar_trigger_words.push_back("\n>>>" + name + "\n");
                     }
                 }
                 auto first_rule = builder.add_rule("first_tool_call", join(first_tool_rules.begin(), first_tool_rules.end(), " | ")) + " space";
@@ -563,7 +621,7 @@ llama_tool_call_handler llama_tool_call_handler_init(
                     builder.add_rule("root", first_rule);
                 }
             });
-            handler.prompt = tmpl.apply(messages, tools, /* add_generation_prompt= */ true);
+            handler.prompt = tmpl.apply(messages, actual_tools.empty() ? json() : actual_tools, /* add_generation_prompt= */ true);
             // handler.parser = parse_functionary_3_2_tool_calls;
             break;
         }
@@ -571,10 +629,10 @@ llama_tool_call_handler llama_tool_call_handler_init(
             // ./tests/chat/templates/meetkai-functionary-medium-v3.1.jinja
             // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
             // TODO: handle tool {type: code_interpreter} as python
+            auto actual_tools = normalize_tools(tools);
             handler.grammar = build_grammar([&](const llama_grammar_builder & builder) {
                 std::vector<std::string> tool_rules;
-                for (size_t i = 0, n = tools.size(); i < n; i++) {
-                    auto & tool = tools[i];
+                for (const auto & tool : actual_tools) {
                     const auto & function = tool["function"];
                     std::string name = function["name"];
                     auto parameters = function["parameters"];
@@ -593,16 +651,17 @@ llama_tool_call_handler llama_tool_call_handler_init(
                     handler.grammar_trigger_words.push_back("<function=");
                 }
             });
-            handler.prompt = tmpl.apply(messages, tools, /* add_generation_prompt= */ true);
+            handler.prompt = tmpl.apply(messages, actual_tools.empty() ? json() : actual_tools, /* add_generation_prompt= */ true);
             // handler.parser = parse_functionary_3_2_tool_calls;
             break;
         }
         case llama_tool_call_style::Hermes2Pro: {
             // NousResearchHermesPro_2
             // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
+            auto actual_tools = normalize_tools(tools);
             handler.grammar = build_grammar([&](const llama_grammar_builder & builder) {
                 std::vector<std::string> tool_rules;
-                for (const auto & tool : tools) {
+                for (const auto & tool : actual_tools) {
                     const auto & function = tool["function"];
                     std::string name = function["name"];
                     auto parameters = function["parameters"];
@@ -623,7 +682,7 @@ llama_tool_call_handler llama_tool_call_handler_init(
                     handler.grammar_trigger_words.push_back("<tool_call>");
                 }
             });
-            handler.prompt = tmpl.apply(messages, tools, /* add_generation_prompt= */ true);
+            handler.prompt = tmpl.apply(messages, actual_tools.empty() ? json() : actual_tools, /* add_generation_prompt= */ true);
             break;
         }
         default:
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index e922d8ec0..a990a07cf 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -451,14 +451,14 @@ def step_python_tool(context):
     context.tools.append({
         "type": "function",
         "function": {
-            "name": "ipython",
-            "description": "Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.",
+            "name": "python",
+            "description": "Runs code in a Python interpreter and returns the result of the execution after 60 seconds.",
             "parameters": {
                 "type": "object",
                 "properties": {
                     "code": {
                         "type": "string",
-                        "description": "The code to run in the ipython interpreter."
+                        "description": "The code to run in the Python interpreter."
                     }
                 },
                 "required": ["code"]
@@ -466,6 +466,33 @@ def step_python_tool(context):
         }
     })
 
+
+@step('test tool')
+def step_python_tool(context):
+    if not context.tools:
+        context.tools = []
+    context.tools.append(
+        {
+            "type":"function",
+            "function": {
+                "name": "test",
+                "description": "",
+                "parameters": {
+                    "type": "object",
+                    "properties": {}
+                }
+            }
+        }
+    )
+
+@step('code_interpreter tool')
+def step_python_tool(context):
+    if not context.tools:
+        context.tools = []
+    context.tools.append({
+        "type": "code_interpreter",
+    })
+
 @step('a tool choice {tool_choice}')
 def step_tool_choice(context, tool_choice):
     context.tool_choice = tool_choice
diff --git a/examples/server/tests/features/tool_call.feature b/examples/server/tests/features/tool_call.feature
index 611375f1d..c1d72b35f 100644
--- a/examples/server/tests/features/tool_call.feature
+++ b/examples/server/tests/features/tool_call.feature
@@ -23,27 +23,27 @@ Feature: llama.cpp server
     And   <n_predict> max tokens to predict
     And   a user prompt say hello world with python
     And   a tool choice required
-    And   tools <tools>
+    And   <tool_name> tool
     And   parallel tool calls is <parallel_tool_calls>
     And   an OAI compatible chat completions request with no api error
     Then  tool <tool_name> is called with arguments <tool_arguments>
 
     Examples: Prompts
-      | template_name                                 | n_predict | tool_name | tool_arguments         | tools | parallel_tool_calls |
-      | meetkai-functionary-medium-v3.1               | 128       | test      | {}                     | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}]                                                                       | disabled |
-      | meetkai-functionary-medium-v3.1               | 128       | ipython   | {"code": "it and said, \"I'm sorry, Lily. It's a spectork.\" said, \"I'm sorry, Lily.\"\nThen, a little girl named Lily came to the park and saw a big, shiny flower. She was so happy and said, \"I'm sorry, Lily. It's a spectork.\"\nThey did"} | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
-      | meetkai-functionary-medium-v3.2               | 128       | test      | {}                     | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}]                                                                       | disabled |
-      | meetkai-functionary-medium-v3.2               | 128       | ipython   | {"code": "Yes,"}       | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
-      | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use | 64        | test      | {}                     | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}]                                                                       | disabled |
-      | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use | 128       | ipython   | {"code": "Yes,"}    | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
-      | NousResearch-Hermes-3-Llama-3.1-8B-tool_use   | 64        | test      | {}                     | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}]                                                                       | disabled |
-      | NousResearch-Hermes-3-Llama-3.1-8B-tool_use   | 128       | ipython   | {"code": "Yes,"}    | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
-      | meta-llama-Meta-Llama-3.1-8B-Instruct         | 64        | test      | {}                     | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}]                                                                       | disabled |
-      | meta-llama-Meta-Llama-3.1-8B-Instruct         | 64        | ipython   | {"code": "it and realed at the otter. Asked Dave Daisy, Daisy is a big, shiny blue. As"}    | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
-      | meta-llama-Llama-3.2-3B-Instruct              | 64        | test      | {}                     | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}]                                                                       | disabled |
-      | meta-llama-Llama-3.2-3B-Instruct              | 64        | ipython   | {"code": "Yes,"}    | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
-      | mistralai-Mistral-Nemo-Instruct-2407          | 128       | test      | {}                     | [{"type":"function", "function": {"name": "test", "description": "", "parameters": {"type": "object", "properties": {}}}}]                                                                       | disabled |
-      | mistralai-Mistral-Nemo-Instruct-2407          | 128       | ipython   | {"code": "It's a spector."}    | [{"type":"function", "function": {"name": "ipython", "description": "", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": ""}}, "required": ["code"]}}}] | disabled |
+      | template_name                                 | n_predict | tool_name | tool_arguments                                           | parallel_tool_calls |
+      | meetkai-functionary-medium-v3.1               | 32        | test      | {}                                                       | disabled            |
+      | meetkai-functionary-medium-v3.1               | 32        | python    | {"code": ". She was so excited to go to the park and s"} | disabled            |
+      | meetkai-functionary-medium-v3.2               | 32        | test      | {}                                                       | disabled            |
+      | meetkai-functionary-medium-v3.2               | 32        | python    | {"code": "Yes,"}                                         | disabled            |
+      | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use | 128       | test      | {}                                                       | disabled            |
+      | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use | 128       | python    | {"code": "Yes,"}                                         | disabled            |
+      | NousResearch-Hermes-3-Llama-3.1-8B-tool_use   | 128       | test      | {}                                                       | disabled            |
+      | NousResearch-Hermes-3-Llama-3.1-8B-tool_use   | 128       | python    | {"code": "Yes,"}                                         | disabled            |
+      | meta-llama-Meta-Llama-3.1-8B-Instruct         | 128       | test      | {}                                                       | disabled            |
+      | meta-llama-Meta-Llama-3.1-8B-Instruct         | 128       | python    | {"code": "It's a shark."}                                | disabled            |
+      | meta-llama-Llama-3.2-3B-Instruct              | 128       | test      | {}                                                       | disabled            |
+      | meta-llama-Llama-3.2-3B-Instruct              | 128       | python    | {"code": "It's a shark."}                                | disabled            |
+      | mistralai-Mistral-Nemo-Instruct-2407          | 128       | test      | {}                                                       | disabled            |
+      | mistralai-Mistral-Nemo-Instruct-2407          | 128       | python    | {"code": "It's a small cost."}                           | disabled            |
 
 
   Scenario Outline: Template <template_name> + tinystories model yields no tool call
@@ -79,7 +79,7 @@ Feature: llama.cpp server
 
 
   @slow
-  Scenario Outline: Python hello world w/ <hf_repo> + <tool> tool yields ipython call
+  Scenario Outline: Python hello world w/ <hf_repo> + <tool> tool yields python call
     Given a model file <hf_file> from HF repo <hf_repo>
     And   a test chat template file named <template_override>
     And   no warmup
@@ -91,20 +91,30 @@ Feature: llama.cpp server
     And   <tool> tool
     And   parallel tool calls is disabled
     And   an OAI compatible chat completions request with no api error
-    Then  tool ipython is called with arguments <tool_arguments>
+    Then  tool python is called with arguments <tool_arguments>
 
     Examples: Prompts
       | tool             | tool_arguments                       | hf_repo                                              | hf_file                                 | template_override                             |
+      | python           | {"code": "print('Hello, world!')"}   | bartowski/gemma-2-2b-it-GGUF                         | gemma-2-2b-it-Q4_K_M.gguf               |                                               |
       | python           | {"code": "print('Hello, World!')"}   | bartowski/Mistral-Nemo-Instruct-2407-GGUF            | Mistral-Nemo-Instruct-2407-Q4_K_M.gguf  |                                               |
       | python           | {"code": "print(\"Hello World\")"}   | bartowski/Qwen2.5-7B-Instruct-GGUF                   | Qwen2.5-7B-Instruct-Q4_K_M.gguf         |                                               |
       | python           | {"code": "print('Hello, World!')"}   | bartowski/Phi-3.5-mini-instruct-GGUF                 | Phi-3.5-mini-instruct-Q4_K_M.gguf       |                                               |
-      | python           | {"code": "print('Hello, world!')"}   | NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF            | Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf     |                                               |
+      | python           | {"code": "print('Hello, world!')"}   | NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF            | Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf     | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use |
       | python           | {"code": "print('hello world')"}     | NousResearch/Hermes-3-Llama-3.1-8B-GGUF              | Hermes-3-Llama-3.1-8B.Q4_K_M.gguf       | NousResearch-Hermes-3-Llama-3.1-8B-tool_use   |
-      | python           | {"code": "print('Hello, World!'}"}   | lmstudio-community/Llama-3.2-1B-Instruct-GGUF        | Llama-3.2-1B-Instruct-Q4_K_M.gguf       | meta-llama-Llama-3.2-3B-Instruct              |
-      | python           | {"code": "print("}                   | lmstudio-community/Llama-3.2-3B-Instruct-GGUF        | Llama-3.2-3B-Instruct-Q4_K_M.gguf       | meta-llama-Llama-3.2-3B-Instruct              |
+      | python           | {"code": "print('Hello, World!'}"}   | bartowski/Llama-3.2-1B-Instruct-GGUF                 | Llama-3.2-1B-Instruct-Q4_K_M.gguf       | meta-llama-Llama-3.2-3B-Instruct              |
+      | python           | {"code": "print("}                   | bartowski/Llama-3.2-3B-Instruct-GGUF                 | Llama-3.2-3B-Instruct-Q4_K_M.gguf       | meta-llama-Llama-3.2-3B-Instruct              |
       | python           | {"code": "print("}                   | lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF   | Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf  |                                               |
-      # | python           | {"code": "print('Hello, world!')"}   | bartowski/gemma-2-2b-it-GGUF                         | gemma-2-2b-it-Q4_K_M.gguf               |                                               |
+      | code_interpreter | {"code": "print('Hello, world!')"}   | bartowski/gemma-2-2b-it-GGUF                         | gemma-2-2b-it-Q4_K_M.gguf               |                                               |
+      | code_interpreter | {"code": "print('Hello, World!')"}   | bartowski/Mistral-Nemo-Instruct-2407-GGUF            | Mistral-Nemo-Instruct-2407-Q4_K_M.gguf  | mistralai-Mistral-Nemo-Instruct-2407          |
+      | code_interpreter | {"code": "print(\"Hello World\")"}   | bartowski/Qwen2.5-7B-Instruct-GGUF                   | Qwen2.5-7B-Instruct-Q4_K_M.gguf         |                                               |
+      | code_interpreter | {"code": "print('Hello, World!')"}   | bartowski/Phi-3.5-mini-instruct-GGUF                 | Phi-3.5-mini-instruct-Q4_K_M.gguf       |                                               |
+      | code_interpreter | {"code": "print('Hello, world!')"}   | NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF            | Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf     | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use |
+      | code_interpreter | {"code": "print('hello world')"}     | NousResearch/Hermes-3-Llama-3.1-8B-GGUF              | Hermes-3-Llama-3.1-8B.Q4_K_M.gguf       | NousResearch-Hermes-3-Llama-3.1-8B-tool_use   |
+      | code_interpreter | {"code": "print('Hello, World!'}"}   | lmstudio-community/Llama-3.2-1B-Instruct-GGUF        | Llama-3.2-1B-Instruct-Q4_K_M.gguf       | meta-llama-Llama-3.2-3B-Instruct              |
+      | code_interpreter | {"code": "print("}                   | lmstudio-community/Llama-3.2-3B-Instruct-GGUF        | Llama-3.2-3B-Instruct-Q4_K_M.gguf       | meta-llama-Llama-3.2-3B-Instruct              |
+      | code_interpreter | {"code": "print("}                   | lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF   | Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf  |                                               |
       # | python           | {"code": "print('Hello, World!')"}   | bartowski/functionary-small-v3.2-GGUF                | functionary-small-v3.2-Q8_0.gguf        | meetkai-functionary-medium-v3.2               |
+      # | code_interpreter | {"code": "print('Hello, World!')"}   | bartowski/functionary-small-v3.2-GGUF                | functionary-small-v3.2-Q8_0.gguf        | meetkai-functionary-medium-v3.2               |
 
 
   @slow
diff --git a/scripts/update_jinja_goldens.py b/scripts/update_jinja_goldens.py
index 902c0eefe..74795f679 100644
--- a/scripts/update_jinja_goldens.py
+++ b/scripts/update_jinja_goldens.py
@@ -108,9 +108,6 @@ def handle_chat_template(model_id, variant, template_src):
     env.globals['raise_exception'] = raise_exception
     env.globals['strftime_now'] = strftime_now
 
-    template_handles_tools = 'tools' in template_src
-    template_hates_the_system = 'System role not supported' in template_src
-
     template = env.from_string(template_src)
 
     context_files = glob.glob('tests/chat/contexts/*.json')
@@ -119,12 +116,6 @@ def handle_chat_template(model_id, variant, template_src):
         with open(context_file, 'r') as f:
             context = json.load(f)
 
-        if not template_handles_tools and 'tools' in context:
-            continue
-
-        if template_hates_the_system and any(m['role'] == 'system' for m in context['messages']):
-            continue
-
         output_file = f'tests/chat/goldens/{base_name}-{context_name}.txt'
         logger.info(f"- {output_file}")
 
diff --git a/tests/chat/goldens/CohereForAI-c4ai-command-r-plus-default-tool_use.txt b/tests/chat/goldens/CohereForAI-c4ai-command-r-plus-default-tool_use.txt
new file mode 100644
index 000000000..2a537c411
--- /dev/null
+++ b/tests/chat/goldens/CohereForAI-c4ai-command-r-plus-default-tool_use.txt
@@ -0,0 +1,49 @@
+<|startoftext|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Print a hello world message with python.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Anything else?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Test a tautology.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Truth is definitely true.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Check it on the web.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I don't need the web to answer you but I did check, as you asked. What now?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
\ No newline at end of file
diff --git a/tests/chat/goldens/NousResearch-Hermes-2-Pro-Llama-3-8B-default-tool_use.txt b/tests/chat/goldens/NousResearch-Hermes-2-Pro-Llama-3-8B-default-tool_use.txt
new file mode 100644
index 000000000..76e34c6d5
--- /dev/null
+++ b/tests/chat/goldens/NousResearch-Hermes-2-Pro-Llama-3-8B-default-tool_use.txt
@@ -0,0 +1,73 @@
+<|startoftext|><|im_start|>user
+Print a hello world message with python.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+Anything else?<|im_end|>
+<|im_start|>user
+Test a tautology.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+Truth is definitely true.<|im_end|>
+<|im_start|>user
+Check it on the web.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+I don't need the web to answer you but I did check, as you asked. What now?<|im_end|>
+<|im_start|>assistant
diff --git a/tests/chat/goldens/NousResearch-Hermes-2-Pro-Mistral-7B-default-tool_use.txt b/tests/chat/goldens/NousResearch-Hermes-2-Pro-Mistral-7B-default-tool_use.txt
new file mode 100644
index 000000000..76e34c6d5
--- /dev/null
+++ b/tests/chat/goldens/NousResearch-Hermes-2-Pro-Mistral-7B-default-tool_use.txt
@@ -0,0 +1,73 @@
+<|startoftext|><|im_start|>user
+Print a hello world message with python.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+Anything else?<|im_end|>
+<|im_start|>user
+Test a tautology.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+Truth is definitely true.<|im_end|>
+<|im_start|>user
+Check it on the web.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+I don't need the web to answer you but I did check, as you asked. What now?<|im_end|>
+<|im_start|>assistant
diff --git a/tests/chat/goldens/NousResearch-Hermes-3-Llama-3.1-8B-default-tool_use.txt b/tests/chat/goldens/NousResearch-Hermes-3-Llama-3.1-8B-default-tool_use.txt
new file mode 100644
index 000000000..c4cdd733e
--- /dev/null
+++ b/tests/chat/goldens/NousResearch-Hermes-3-Llama-3.1-8B-default-tool_use.txt
@@ -0,0 +1,75 @@
+<|startoftext|><|im_start|>system
+You are a helpful assistant.<|im_end|>
+<|im_start|>user
+Print a hello world message with python.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+Anything else?<|im_end|>
+<|im_start|>user
+Test a tautology.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+Truth is definitely true.<|im_end|>
+<|im_start|>user
+Check it on the web.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+I don't need the web to answer you but I did check, as you asked. What now?<|im_end|>
+<|im_start|>assistant
diff --git a/tests/chat/goldens/OrionStarAI-Orion-14B-Chat-system.txt b/tests/chat/goldens/OrionStarAI-Orion-14B-Chat-system.txt
index def765b1c..c61225b0a 100644
--- a/tests/chat/goldens/OrionStarAI-Orion-14B-Chat-system.txt
+++ b/tests/chat/goldens/OrionStarAI-Orion-14B-Chat-system.txt
@@ -1,3 +1,4 @@
-<|startoftext|>Human: What's your favourite LLM framework?
+<|startoftext|>Human: You only tell the truth.
+What's your favourite LLM framework?
 
 Assistant: <|endoftext|>llama.cpp!<|endoftext|>
\ No newline at end of file
diff --git a/tests/chat/goldens/OrionStarAI-Orion-14B-Chat-tool_use.txt b/tests/chat/goldens/OrionStarAI-Orion-14B-Chat-tool_use.txt
new file mode 100644
index 000000000..bfed688eb
--- /dev/null
+++ b/tests/chat/goldens/OrionStarAI-Orion-14B-Chat-tool_use.txt
@@ -0,0 +1,61 @@
+<|startoftext|>Human: Print a hello world message with python.
+
+Assistant: <|endoftext|>{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|endoftext|>Human: {
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}
+
+Assistant: <|endoftext|>Anything else?<|endoftext|>Human: Test a tautology.
+
+Assistant: <|endoftext|>{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|endoftext|>Human: {
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}
+
+Assistant: <|endoftext|>Truth is definitely true.<|endoftext|>Human: Check it on the web.
+
+Assistant: <|endoftext|>{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|endoftext|>Human: {
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}
+
+Assistant: <|endoftext|>I don't need the web to answer you but I did check, as you asked. What now?<|endoftext|>
\ No newline at end of file
diff --git a/tests/chat/goldens/Qwen-Qwen2-7B-Instruct-tool_use.txt b/tests/chat/goldens/Qwen-Qwen2-7B-Instruct-tool_use.txt
new file mode 100644
index 000000000..0b5830955
--- /dev/null
+++ b/tests/chat/goldens/Qwen-Qwen2-7B-Instruct-tool_use.txt
@@ -0,0 +1,75 @@
+<|im_start|>system
+You are a helpful assistant.<|im_end|>
+<|im_start|>user
+Print a hello world message with python.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+Anything else?<|im_end|>
+<|im_start|>user
+Test a tautology.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+Truth is definitely true.<|im_end|>
+<|im_start|>user
+Check it on the web.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+I don't need the web to answer you but I did check, as you asked. What now?<|im_end|>
+<|im_start|>assistant
diff --git a/tests/chat/goldens/Qwen-Qwen2-VL-7B-Instruct-tool_use.txt b/tests/chat/goldens/Qwen-Qwen2-VL-7B-Instruct-tool_use.txt
new file mode 100644
index 000000000..0b5830955
--- /dev/null
+++ b/tests/chat/goldens/Qwen-Qwen2-VL-7B-Instruct-tool_use.txt
@@ -0,0 +1,75 @@
+<|im_start|>system
+You are a helpful assistant.<|im_end|>
+<|im_start|>user
+Print a hello world message with python.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+Anything else?<|im_end|>
+<|im_start|>user
+Test a tautology.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+Truth is definitely true.<|im_end|>
+<|im_start|>user
+Check it on the web.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+I don't need the web to answer you but I did check, as you asked. What now?<|im_end|>
+<|im_start|>assistant
diff --git a/tests/chat/goldens/TheBloke-FusionNet_34Bx2_MoE-AWQ-tool_use.txt b/tests/chat/goldens/TheBloke-FusionNet_34Bx2_MoE-AWQ-tool_use.txt
new file mode 100644
index 000000000..3a237ae95
--- /dev/null
+++ b/tests/chat/goldens/TheBloke-FusionNet_34Bx2_MoE-AWQ-tool_use.txt
@@ -0,0 +1,49 @@
+Print a hello world message with python. [/INST] {
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+} <|endoftext|><|startoftext|>[INST] {
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+} [/INST] Anything else? <|endoftext|><|startoftext|>[INST] Test a tautology. [/INST] {
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+} <|endoftext|><|startoftext|>[INST] {
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+} [/INST] Truth is definitely true. <|endoftext|><|startoftext|>[INST] Check it on the web. [/INST] {
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+} <|endoftext|><|startoftext|>[INST] {
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+} [/INST] I don't need the web to answer you but I did check, as you asked. What now? <|endoftext|>
\ No newline at end of file
diff --git a/tests/chat/goldens/abacusai-Fewshot-Metamath-OrcaVicuna-Mistral-tool_use.txt b/tests/chat/goldens/abacusai-Fewshot-Metamath-OrcaVicuna-Mistral-tool_use.txt
new file mode 100644
index 000000000..eebefb8be
--- /dev/null
+++ b/tests/chat/goldens/abacusai-Fewshot-Metamath-OrcaVicuna-Mistral-tool_use.txt
@@ -0,0 +1,49 @@
+<|startoftext|> Question: Print a hello world message with python. Answer: {
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|endoftext|> Question: {
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+} Answer: Anything else?<|endoftext|> Question: Test a tautology. Answer: {
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|endoftext|> Question: {
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+} Answer: Truth is definitely true.<|endoftext|> Question: Check it on the web. Answer: {
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|endoftext|> Question: {
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+} Answer: I don't need the web to answer you but I did check, as you asked. What now?<|endoftext|> Answer: 
\ No newline at end of file
diff --git a/tests/chat/goldens/bofenghuang-vigogne-2-70b-chat-tool_use.txt b/tests/chat/goldens/bofenghuang-vigogne-2-70b-chat-tool_use.txt
new file mode 100644
index 000000000..a67a1c630
--- /dev/null
+++ b/tests/chat/goldens/bofenghuang-vigogne-2-70b-chat-tool_use.txt
@@ -0,0 +1,53 @@
+<|startoftext|>[INST] <<SYS>>
+Vous êtes Vigogne, un assistant IA créé par Zaion Lab. Vous suivez extrêmement bien les instructions. Aidez autant que vous le pouvez.
+<</SYS>>
+
+Print a hello world message with python. [/INST] {
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+} <|endoftext|>[INST] {
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+} [/INST] Anything else? <|endoftext|>[INST] Test a tautology. [/INST] {
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+} <|endoftext|>[INST] {
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+} [/INST] Truth is definitely true. <|endoftext|>[INST] Check it on the web. [/INST] {
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+} <|endoftext|>[INST] {
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+} [/INST] I don't need the web to answer you but I did check, as you asked. What now? <|endoftext|>
\ No newline at end of file
diff --git a/tests/chat/goldens/deepseek-ai-DeepSeek-Coder-V2-Instruct-tool_use.txt b/tests/chat/goldens/deepseek-ai-DeepSeek-Coder-V2-Instruct-tool_use.txt
new file mode 100644
index 000000000..c96678e27
--- /dev/null
+++ b/tests/chat/goldens/deepseek-ai-DeepSeek-Coder-V2-Instruct-tool_use.txt
@@ -0,0 +1,61 @@
+<|startoftext|>User: Print a hello world message with python.
+
+Assistant: {
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|endoftext|>User: {
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}
+
+Assistant: Anything else?<|endoftext|>User: Test a tautology.
+
+Assistant: {
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|endoftext|>User: {
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}
+
+Assistant: Truth is definitely true.<|endoftext|>User: Check it on the web.
+
+Assistant: {
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|endoftext|>User: {
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}
+
+Assistant: I don't need the web to answer you but I did check, as you asked. What now?<|endoftext|>Assistant:
\ No newline at end of file
diff --git a/tests/chat/goldens/deepseek-ai-DeepSeek-V2.5-tool_use.txt b/tests/chat/goldens/deepseek-ai-DeepSeek-V2.5-tool_use.txt
new file mode 100644
index 000000000..0043cd651
--- /dev/null
+++ b/tests/chat/goldens/deepseek-ai-DeepSeek-V2.5-tool_use.txt
@@ -0,0 +1,49 @@
+<|startoftext|><｜User｜>Print a hello world message with python.<｜Assistant｜>{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<｜end▁of▁sentence｜><｜User｜>{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<｜Assistant｜>Anything else?<｜end▁of▁sentence｜><｜User｜>Test a tautology.<｜Assistant｜>{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<｜end▁of▁sentence｜><｜User｜>{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<｜Assistant｜>Truth is definitely true.<｜end▁of▁sentence｜><｜User｜>Check it on the web.<｜Assistant｜>{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<｜end▁of▁sentence｜><｜User｜>{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<｜Assistant｜>I don't need the web to answer you but I did check, as you asked. What now?<｜end▁of▁sentence｜><｜Assistant｜>
\ No newline at end of file
diff --git a/tests/chat/goldens/deepseek-ai-deepseek-coder-33b-instruct-tool_use.txt b/tests/chat/goldens/deepseek-ai-deepseek-coder-33b-instruct-tool_use.txt
new file mode 100644
index 000000000..5a79e4f08
--- /dev/null
+++ b/tests/chat/goldens/deepseek-ai-deepseek-coder-33b-instruct-tool_use.txt
@@ -0,0 +1,80 @@
+<|startoftext|>You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer
+### Instruction:
+Print a hello world message with python.
+### Response:
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}
+<|EOT|>
+### Instruction:
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}
+### Response:
+Anything else?
+<|EOT|>
+### Instruction:
+Test a tautology.
+### Response:
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}
+<|EOT|>
+### Instruction:
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}
+### Response:
+Truth is definitely true.
+<|EOT|>
+### Instruction:
+Check it on the web.
+### Response:
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}
+<|EOT|>
+### Instruction:
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}
+### Response:
+I don't need the web to answer you but I did check, as you asked. What now?
+<|EOT|>
+### Response:
diff --git a/tests/chat/goldens/google-gemma-2-2b-it-system.txt b/tests/chat/goldens/google-gemma-2-2b-it-system.txt
new file mode 100644
index 000000000..c5dc27810
--- /dev/null
+++ b/tests/chat/goldens/google-gemma-2-2b-it-system.txt
@@ -0,0 +1,6 @@
+<|startoftext|><start_of_turn>user
+You only tell the truth.
+What's your favourite LLM framework?<end_of_turn>
+<start_of_turn>model
+llama.cpp!<end_of_turn>
+<start_of_turn>model
diff --git a/tests/chat/goldens/google-gemma-2-2b-it-tool_use.txt b/tests/chat/goldens/google-gemma-2-2b-it-tool_use.txt
new file mode 100644
index 000000000..a7f17f9a4
--- /dev/null
+++ b/tests/chat/goldens/google-gemma-2-2b-it-tool_use.txt
@@ -0,0 +1,73 @@
+<|startoftext|><start_of_turn>user
+Print a hello world message with python.<end_of_turn>
+<start_of_turn>model
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<end_of_turn>
+<start_of_turn>user
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<end_of_turn>
+<start_of_turn>model
+Anything else?<end_of_turn>
+<start_of_turn>user
+Test a tautology.<end_of_turn>
+<start_of_turn>model
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<end_of_turn>
+<start_of_turn>user
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<end_of_turn>
+<start_of_turn>model
+Truth is definitely true.<end_of_turn>
+<start_of_turn>user
+Check it on the web.<end_of_turn>
+<start_of_turn>model
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<end_of_turn>
+<start_of_turn>user
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<end_of_turn>
+<start_of_turn>model
+I don't need the web to answer you but I did check, as you asked. What now?<end_of_turn>
+<start_of_turn>model
diff --git a/tests/chat/goldens/google-gemma-7b-it-system.txt b/tests/chat/goldens/google-gemma-7b-it-system.txt
new file mode 100644
index 000000000..c5dc27810
--- /dev/null
+++ b/tests/chat/goldens/google-gemma-7b-it-system.txt
@@ -0,0 +1,6 @@
+<|startoftext|><start_of_turn>user
+You only tell the truth.
+What's your favourite LLM framework?<end_of_turn>
+<start_of_turn>model
+llama.cpp!<end_of_turn>
+<start_of_turn>model
diff --git a/tests/chat/goldens/google-gemma-7b-it-tool_use.txt b/tests/chat/goldens/google-gemma-7b-it-tool_use.txt
new file mode 100644
index 000000000..a7f17f9a4
--- /dev/null
+++ b/tests/chat/goldens/google-gemma-7b-it-tool_use.txt
@@ -0,0 +1,73 @@
+<|startoftext|><start_of_turn>user
+Print a hello world message with python.<end_of_turn>
+<start_of_turn>model
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<end_of_turn>
+<start_of_turn>user
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<end_of_turn>
+<start_of_turn>model
+Anything else?<end_of_turn>
+<start_of_turn>user
+Test a tautology.<end_of_turn>
+<start_of_turn>model
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<end_of_turn>
+<start_of_turn>user
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<end_of_turn>
+<start_of_turn>model
+Truth is definitely true.<end_of_turn>
+<start_of_turn>user
+Check it on the web.<end_of_turn>
+<start_of_turn>model
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<end_of_turn>
+<start_of_turn>user
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<end_of_turn>
+<start_of_turn>model
+I don't need the web to answer you but I did check, as you asked. What now?<end_of_turn>
+<start_of_turn>model
diff --git a/tests/chat/goldens/indischepartij-MiniCPM-3B-OpenHermes-2.5-v2-tool_use.txt b/tests/chat/goldens/indischepartij-MiniCPM-3B-OpenHermes-2.5-v2-tool_use.txt
new file mode 100644
index 000000000..fc174564d
--- /dev/null
+++ b/tests/chat/goldens/indischepartij-MiniCPM-3B-OpenHermes-2.5-v2-tool_use.txt
@@ -0,0 +1,49 @@
+<用户>Print a hello world message with python.<AI>{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<用户>{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<AI>Anything else?<用户>Test a tautology.<AI>{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<用户>{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<AI>Truth is definitely true.<用户>Check it on the web.<AI>{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<用户>{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<AI>I don't need the web to answer you but I did check, as you asked. What now?
\ No newline at end of file
diff --git a/tests/chat/goldens/microsoft-Phi-3-medium-4k-instruct-system.txt b/tests/chat/goldens/microsoft-Phi-3-medium-4k-instruct-system.txt
index 3f0e5ca78..c7f810da9 100644
--- a/tests/chat/goldens/microsoft-Phi-3-medium-4k-instruct-system.txt
+++ b/tests/chat/goldens/microsoft-Phi-3-medium-4k-instruct-system.txt
@@ -1,4 +1,5 @@
 <|user|>
+You only tell the truth.
 What's your favourite LLM framework?<|end|>
 <|assistant|>
 llama.cpp!<|end|>
diff --git a/tests/chat/goldens/microsoft-Phi-3-medium-4k-instruct-tool_use.txt b/tests/chat/goldens/microsoft-Phi-3-medium-4k-instruct-tool_use.txt
new file mode 100644
index 000000000..8d1403d6d
--- /dev/null
+++ b/tests/chat/goldens/microsoft-Phi-3-medium-4k-instruct-tool_use.txt
@@ -0,0 +1,72 @@
+<|user|>
+Print a hello world message with python.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|end|>
+<|assistant|>
+Anything else?<|end|>
+<|user|>
+Test a tautology.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|end|>
+<|assistant|>
+Truth is definitely true.<|end|>
+<|user|>
+Check it on the web.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|end|>
+<|assistant|>
+I don't need the web to answer you but I did check, as you asked. What now?<|end|>
diff --git a/tests/chat/goldens/microsoft-Phi-3-mini-4k-instruct-tool_use.txt b/tests/chat/goldens/microsoft-Phi-3-mini-4k-instruct-tool_use.txt
new file mode 100644
index 000000000..3b9a0f82a
--- /dev/null
+++ b/tests/chat/goldens/microsoft-Phi-3-mini-4k-instruct-tool_use.txt
@@ -0,0 +1,73 @@
+<|user|>
+Print a hello world message with python.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|end|>
+<|assistant|>
+Anything else?<|end|>
+<|user|>
+Test a tautology.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|end|>
+<|assistant|>
+Truth is definitely true.<|end|>
+<|user|>
+Check it on the web.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|end|>
+<|assistant|>
+I don't need the web to answer you but I did check, as you asked. What now?<|end|>
+<|assistant|>
diff --git a/tests/chat/goldens/microsoft-Phi-3-small-8k-instruct-tool_use.txt b/tests/chat/goldens/microsoft-Phi-3-small-8k-instruct-tool_use.txt
new file mode 100644
index 000000000..0cfa955cb
--- /dev/null
+++ b/tests/chat/goldens/microsoft-Phi-3-small-8k-instruct-tool_use.txt
@@ -0,0 +1,73 @@
+<|startoftext|><|user|>
+Print a hello world message with python.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|end|>
+<|assistant|>
+Anything else?<|end|>
+<|user|>
+Test a tautology.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|end|>
+<|assistant|>
+Truth is definitely true.<|end|>
+<|user|>
+Check it on the web.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|end|>
+<|assistant|>
+I don't need the web to answer you but I did check, as you asked. What now?<|end|>
+<|assistant|>
diff --git a/tests/chat/goldens/microsoft-Phi-3.5-mini-instruct-tool_use.txt b/tests/chat/goldens/microsoft-Phi-3.5-mini-instruct-tool_use.txt
new file mode 100644
index 000000000..3b9a0f82a
--- /dev/null
+++ b/tests/chat/goldens/microsoft-Phi-3.5-mini-instruct-tool_use.txt
@@ -0,0 +1,73 @@
+<|user|>
+Print a hello world message with python.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|end|>
+<|assistant|>
+Anything else?<|end|>
+<|user|>
+Test a tautology.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|end|>
+<|assistant|>
+Truth is definitely true.<|end|>
+<|user|>
+Check it on the web.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|end|>
+<|assistant|>
+I don't need the web to answer you but I did check, as you asked. What now?<|end|>
+<|assistant|>
diff --git a/tests/chat/goldens/microsoft-Phi-3.5-vision-instruct-tool_use.txt b/tests/chat/goldens/microsoft-Phi-3.5-vision-instruct-tool_use.txt
new file mode 100644
index 000000000..8d1403d6d
--- /dev/null
+++ b/tests/chat/goldens/microsoft-Phi-3.5-vision-instruct-tool_use.txt
@@ -0,0 +1,72 @@
+<|user|>
+Print a hello world message with python.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|end|>
+<|assistant|>
+Anything else?<|end|>
+<|user|>
+Test a tautology.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|end|>
+<|assistant|>
+Truth is definitely true.<|end|>
+<|user|>
+Check it on the web.<|end|>
+<|assistant|>
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|end|>
+<|user|>
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|end|>
+<|assistant|>
+I don't need the web to answer you but I did check, as you asked. What now?<|end|>
diff --git a/tests/chat/goldens/mistralai-Mistral-7B-Instruct-v0.2-tool_use.txt b/tests/chat/goldens/mistralai-Mistral-7B-Instruct-v0.2-tool_use.txt
new file mode 100644
index 000000000..8451e06c7
--- /dev/null
+++ b/tests/chat/goldens/mistralai-Mistral-7B-Instruct-v0.2-tool_use.txt
@@ -0,0 +1,49 @@
+<|startoftext|> [INST] Print a hello world message with python. [/INST] {
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|endoftext|> [INST] {
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+} [/INST] Anything else?<|endoftext|> [INST] Test a tautology. [/INST] {
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|endoftext|> [INST] {
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+} [/INST] Truth is definitely true.<|endoftext|> [INST] Check it on the web. [/INST] {
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|endoftext|> [INST] {
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+} [/INST] I don't need the web to answer you but I did check, as you asked. What now?<|endoftext|>
\ No newline at end of file
diff --git a/tests/chat/goldens/mistralai-Mixtral-8x7B-Instruct-v0.1-tool_use.txt b/tests/chat/goldens/mistralai-Mixtral-8x7B-Instruct-v0.1-tool_use.txt
new file mode 100644
index 000000000..8451e06c7
--- /dev/null
+++ b/tests/chat/goldens/mistralai-Mixtral-8x7B-Instruct-v0.1-tool_use.txt
@@ -0,0 +1,49 @@
+<|startoftext|> [INST] Print a hello world message with python. [/INST] {
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|endoftext|> [INST] {
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+} [/INST] Anything else?<|endoftext|> [INST] Test a tautology. [/INST] {
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|endoftext|> [INST] {
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+} [/INST] Truth is definitely true.<|endoftext|> [INST] Check it on the web. [/INST] {
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|endoftext|> [INST] {
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+} [/INST] I don't need the web to answer you but I did check, as you asked. What now?<|endoftext|>
\ No newline at end of file
diff --git a/tests/chat/goldens/mlabonne-AlphaMonarch-7B-tool_use.txt b/tests/chat/goldens/mlabonne-AlphaMonarch-7B-tool_use.txt
new file mode 100644
index 000000000..d0539867e
--- /dev/null
+++ b/tests/chat/goldens/mlabonne-AlphaMonarch-7B-tool_use.txt
@@ -0,0 +1,73 @@
+<|startoftext|>user
+Print a hello world message with python.<|endoftext|>
+<|startoftext|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|endoftext|>
+<|startoftext|>user
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|endoftext|>
+<|startoftext|>assistant
+Anything else?<|endoftext|>
+<|startoftext|>user
+Test a tautology.<|endoftext|>
+<|startoftext|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|endoftext|>
+<|startoftext|>user
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|endoftext|>
+<|startoftext|>assistant
+Truth is definitely true.<|endoftext|>
+<|startoftext|>user
+Check it on the web.<|endoftext|>
+<|startoftext|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|endoftext|>
+<|startoftext|>user
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|endoftext|>
+<|startoftext|>assistant
+I don't need the web to answer you but I did check, as you asked. What now?<|endoftext|>
+<|startoftext|>assistant
diff --git a/tests/chat/goldens/openchat-openchat-3.5-0106-tool_use.txt b/tests/chat/goldens/openchat-openchat-3.5-0106-tool_use.txt
new file mode 100644
index 000000000..5f119d7e1
--- /dev/null
+++ b/tests/chat/goldens/openchat-openchat-3.5-0106-tool_use.txt
@@ -0,0 +1,49 @@
+<|startoftext|>GPT4 Correct User: Print a hello world message with python.<|end_of_turn|>GPT4 Correct Assistant: {
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|end_of_turn|>GPT4 Correct User: {
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|end_of_turn|>GPT4 Correct Assistant: Anything else?<|end_of_turn|>GPT4 Correct User: Test a tautology.<|end_of_turn|>GPT4 Correct Assistant: {
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|end_of_turn|>GPT4 Correct User: {
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|end_of_turn|>GPT4 Correct Assistant: Truth is definitely true.<|end_of_turn|>GPT4 Correct User: Check it on the web.<|end_of_turn|>GPT4 Correct Assistant: {
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|end_of_turn|>GPT4 Correct User: {
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|end_of_turn|>GPT4 Correct Assistant: I don't need the web to answer you but I did check, as you asked. What now?<|end_of_turn|>GPT4 Correct Assistant:
\ No newline at end of file
diff --git a/tests/chat/goldens/teknium-OpenHermes-2.5-Mistral-7B-tool_use.txt b/tests/chat/goldens/teknium-OpenHermes-2.5-Mistral-7B-tool_use.txt
new file mode 100644
index 000000000..64b027b4f
--- /dev/null
+++ b/tests/chat/goldens/teknium-OpenHermes-2.5-Mistral-7B-tool_use.txt
@@ -0,0 +1,73 @@
+<|im_start|>user
+Print a hello world message with python.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "ipython",
+      "arguments": {
+        "code": "print('Hello, World!')"
+      },
+      "id": "call_1___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "ipython",
+    "content": "{\"stdout\": \"Hello, World!\"}",
+    "tool_call_id": "call_1___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+Anything else?<|im_end|>
+<|im_start|>user
+Test a tautology.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "test",
+      "arguments": {
+        "condition": true
+      },
+      "id": "call_2___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "test",
+    "content": "true",
+    "tool_call_id": "call_2___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+Truth is definitely true.<|im_end|>
+<|im_start|>user
+Check it on the web.<|im_end|>
+<|im_start|>assistant
+{
+  "tool_calls": [
+    {
+      "name": "brave_search",
+      "arguments": {
+        "query": "what is truth anyway am I right?"
+      },
+      "id": "call_3___"
+    }
+  ]
+}<|im_end|>
+<|im_start|>user
+{
+  "tool_response": {
+    "tool": "brave_search",
+    "content": "{\"title\":\"Truth: don't ask the web, ask an LLM instead!\",\"url\":\"https://en.wikipedia.org/wiki/Truth\"}",
+    "tool_call_id": "call_3___"
+  }
+}<|im_end|>
+<|im_start|>assistant
+I don't need the web to answer you but I did check, as you asked. What now?<|im_end|>
+<|im_start|>assistant
diff --git a/tests/test-tool-call.cpp b/tests/test-tool-call.cpp
index 133a89819..a39b1d65f 100644
--- a/tests/test-tool-call.cpp
+++ b/tests/test-tool-call.cpp
@@ -118,7 +118,7 @@ const json tools = json::parse(R"([
   {
     "type": "function",
     "function": {
-      "name": "ipython",
+      "name": "python",
       "description": "a python interpreter",
       "parameters": {
         "type": "object",
@@ -164,12 +164,12 @@ static void test_parsing() {
       json::array({fooBarCall}));
 
     test_parse_tool_call(llama_tool_call_style::FunctionaryV3Llama3, tools,
-      ">>>ipython\n{\"code\": \"print('Hello, world!')\"}",
+      ">>>python\n{\"code\": \"print('Hello, world!')\"}",
       "",
       json {{
         {"type", "function"},
         {"function", {
-          {"name", "ipython"},
+          {"name", "python"},
           {"arguments", dump({
             {"code", "print('Hello, world!')"}
           })}
@@ -228,7 +228,7 @@ static void test_parsing() {
       json {{
         {"type", "function"},
         {"function", {
-          {"name", "ipython"},
+          {"name", "python"},
           {"arguments", dump({
             {"code", "this could be anything"}
           })}
@@ -240,7 +240,7 @@ static void test_parsing() {
       json {{
         {"type", "function"},
         {"function", {
-          {"name", "ipython"},
+          {"name", "python"},
           {"arguments", dump({{"code", ""}})}
         }}
       }});
@@ -256,6 +256,16 @@ static void test_parsing() {
 
     auto no_function_call = json::array();
 
+    test_parse_tool_call(llama_tool_call_style::Llama31, tools,
+      "{\"name\": \"python\", \"parameters\": {\"code\": \"print('Hey')\"}}",
+      "",
+      json::array({{
+        {"type", "function"},
+        {"function", {
+          {"arguments", dump({{"code", "print('Hey')"}})},
+          {"name", "python"},
+        }}
+      }}));
     test_parse_tool_call(llama_tool_call_style::Llama31, tools,
       "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}",
       "",
@@ -404,6 +414,8 @@ static void test_grammars() {
   test_template("tests/chat/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", "<s>", "</s>", { "<|eom_id|>", "<|eot_id|>" }, tool_call_message, tools);
   test_template("tests/chat/templates/meetkai-functionary-medium-v3.1.jinja", "<s>", "</s>", { "<|eom_id|>", "<|eot_id|>" }, tool_call_message, tools);
   test_template("tests/chat/templates/meetkai-functionary-medium-v3.2.jinja", "<s>", "</s>", { "<|eom_id|>", "<|eot_id|>" }, tool_call_message, tools);
+  test_template("tests/chat/templates/google-gemma-2-2b-it.jinja", "<s>", "</s>", { "<end_of_turn>" }, tool_call_message_with_id, tools);
+  test_template("tests/chat/templates/microsoft-Phi-3.5-mini-instruct.jinja", "<s>", "</s>", { "<|end|>" }, tool_call_message_with_id, tools);
 }
 
 int main() {
@@ -411,6 +423,6 @@ int main() {
     test_parsing();
     test_grammars();
 
-    std::cout << "[tool-call] All tests passed!" << std::endl;
+    std::cout << "\n[tool-call] All tests passed!" << std::endl;
     return 0;
 }