diff --git a/common/tool-call.cpp b/common/tool-call.cpp
index adff1b2f8..b209c9145 100644
--- a/common/tool-call.cpp
+++ b/common/tool-call.cpp
@@ -67,6 +67,8 @@ std::string llama_tool_call_style_name(llama_tool_call_style style) {
             return "CommandRPlus";
         case llama_tool_call_style::MistralNemo:
             return "MistralNemo";
+        case llama_tool_call_style::FirefunctionV2:
+            return "FirefunctionV2";
         default:
             return "Unknown";
     }
@@ -92,6 +94,8 @@ llama_tool_call_style llama_tool_call_style_detect(const minja::chat_template &
         return CommandRPlus;
     } else if (src.find("[TOOL_CALLS]") != std::string::npos) {
         return MistralNemo;
+    } else if (src.find(" functools[") != std::string::npos) {
+        return FirefunctionV2;
     } else {
         return Generic;
     }
@@ -315,8 +319,8 @@ static llama_tool_calls parse_generic_tool_calls(const std::string& input) {
     return result;
 }
 
-static llama_tool_calls parse_mistral_nemo_tool_calls(const std::string& input) {
-    auto content_end = input.find("[TOOL_CALLS]");
+static llama_tool_calls parse_prefixed_json_tool_call_array(const std::string& input, const std::string & prefix, size_t rstrip_prefix = 0) {
+    auto content_end = input.find(prefix);
     size_t tc_start = std::string::npos;
 
     llama_tool_calls result;
@@ -330,25 +334,27 @@ static llama_tool_calls parse_mistral_nemo_tool_calls(const std::string& input)
             });
         }
     };
-    if (content_end != std::string::npos) {
-        tc_start = content_end + 12;
+    if (content_end == std::string::npos) {
+        result.content = input;
+    } else {
+        tc_start = content_end + prefix.size() - rstrip_prefix;
         result.content = input.substr(0, content_end);
         auto tool_calls = json::parse(input.substr(tc_start));
         process_tool_calls(tool_calls);
-    } else {
-        // Somehow not getting [TOOL_CALLS] in the output. Oh well, just do without it.
-        try {
-            auto tool_calls = json::parse(input);
-            process_tool_calls(tool_calls);
-        } catch (const json::exception & e) {
-            throw std::runtime_error("Failed to parse tool calls: " + std::string(e.what()) + ":\n" + input);
-        }
     }
     return result;
 }
 
+static llama_tool_calls parse_mistral_nemo_tool_calls(const std::string& input) {
+    return parse_prefixed_json_tool_call_array(input, "[TOOL_CALLS]");
+}
+
+static llama_tool_calls parse_firefunction_v2_tool_calls(const std::string& input) {
+    return parse_prefixed_json_tool_call_array(input, " functools[", /* rstrip_prefix= */ 1);
+}
+
 llama_tool_calls parse_tool_calls(llama_tool_call_style style, const json & tools, const std::string& input) {
-    // fprintf(stderr, "# parse_tool_calls(%s):\n\n%s\n\n", llama_tool_call_style_name(style).c_str(), input.c_str());
+    fprintf(stderr, "# parse_tool_calls(%s):\n\n%s\n\n", llama_tool_call_style_name(style).c_str(), input.c_str());
     switch (style) {
         case llama_tool_call_style::None:
             return {input, {}};
@@ -366,6 +372,8 @@ llama_tool_calls parse_tool_calls(llama_tool_call_style style, const json & tool
             return parse_hermes_tool_calls(input);
         case llama_tool_call_style::MistralNemo:
             return parse_mistral_nemo_tool_calls(input);
+        case llama_tool_call_style::FirefunctionV2:
+            return parse_firefunction_v2_tool_calls(input);
         default:
             throw std::runtime_error("Unsupported tool call style");
     }
@@ -406,16 +414,14 @@ llama_tool_call_handler llama_tool_call_handler_init(
             auto tool_call_schemas = json::array();
             for (const auto & tool : actual_tools) {
                 const auto & function = tool["function"];
-                std::string name = function["name"];
-                auto parameters = function["parameters"];
                 auto tool_schema = json {
                     {"type", "object"},
                     {"properties", {
                         {"name", {
                             {"type", "string"},
-                            {"const", name},
+                            {"const", function["name"]},
                         }},
-                        {"arguments", parameters},
+                        {"arguments", function["parameters"]},
                     }},
                     {"required", json::array({"name", "arguments"})},
                 };
@@ -483,18 +489,16 @@ llama_tool_call_handler llama_tool_call_handler_init(
                 auto schemas = json::array();
                 for (const auto & tool : actual_tools) {
                     const auto & function = tool["function"];
-                    std::string name = function["name"];
-                    auto parameters = function["parameters"];
-                    auto schema = json {
+                    schemas.push_back({
                         {"type", "object"},
                         {"properties", {
                             // Important note: the model is probably trained to take a JSON stringified arguments value.
                             // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
-                            {"arguments", parameters},
                             {"name", {
                                 {"type", "string"},
-                                {"const", name},
+                                {"const", function["name"]},
                             }},
+                            {"arguments", function["parameters"]},
                             {"id", {
                                 {"type", "string"},
                                 // Nemo's template expects a 9-character alphanumeric ID.
@@ -502,8 +506,7 @@ llama_tool_call_handler llama_tool_call_handler_init(
                             }},
                         }},
                         {"required", json::array({"name", "arguments", "id"})},
-                    };
-                    schemas.push_back(schema);
+                    });
                 }
                 auto schema = json {
                     {"type", "array"},
@@ -517,9 +520,41 @@ llama_tool_call_handler llama_tool_call_handler_init(
             });
             if (allow_content) {
                 handler.grammar_trigger_words.push_back("[TOOL_CALLS]");
-                handler.grammar_trigger_words.push_back("[{\"arguments\":");
             }
-            // auto tweaked_messages = add_system(messages, "You are a helpful AI with tool calling capabilities. Prefix any tool calls with [TOOL_CALLS]");
+            handler.prompt = tmpl.apply(messages, actual_tools.empty() ? json() : actual_tools, /* add_generation_prompt= */ true);
+            break;
+        }
+        case llama_tool_call_style::FirefunctionV2: {
+            auto actual_tools = normalize_tools(tools);
+            handler.grammar = build_grammar([&](const llama_grammar_builder & builder) {
+                auto schemas = json::array();
+                for (const auto & tool : actual_tools) {
+                    const auto & function = tool["function"];
+                    schemas.push_back({
+                        {"type", "object"},
+                        {"properties", {
+                            {"name", {
+                                {"type", "string"},
+                                {"const", function["name"]},
+                            }},
+                            {"arguments", function["parameters"]},
+                        }},
+                        {"required", json::array({"name", "arguments", "id"})},
+                    });
+                }
+                auto schema = json {
+                    {"type", "array"},
+                    {"items", json {{"anyOf", schemas}}},
+                    {"minItems", 1},
+                };
+                if (!parallel) {
+                    schema["maxItems"] = 1;
+                }
+                builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
+            });
+            if (allow_content) {
+                handler.grammar_trigger_words.push_back(" functools[");
+            }
             handler.prompt = tmpl.apply(messages, actual_tools.empty() ? json() : actual_tools, /* add_generation_prompt= */ true);
             break;
         }
diff --git a/common/tool-call.h b/common/tool-call.h
index 6d1265460..c2d068441 100644
--- a/common/tool-call.h
+++ b/common/tool-call.h
@@ -18,6 +18,7 @@ enum llama_tool_call_style {
     Hermes2Pro,
     CommandRPlus,
     MistralNemo,
+    FirefunctionV2,
 };
 
 struct llama_tool_call {
diff --git a/examples/agent/README.md b/examples/agent/README.md
index 830c6493c..4770720c6 100644
--- a/examples/agent/README.md
+++ b/examples/agent/README.md
@@ -1,10 +1,11 @@
 # Agents / Tool Calling w/ llama.cpp
 
 While *any model* should work (using some generic support), we only support the native call style of a few models:
-- Llama 3.x
+- Firefunction v2
+- Mistral Nemo
 - Functionary 3.x
-- Hermes 2/3, Qwen 2.5
-- Mistral Nemo.
+- Llama 3.x
+- Hermes 2/3 / Qwen 2.5 / QwQ
 
 For natively supported models, it's important to have the right template (it might not be in the GGUF; note that we prefer the `tool_use` variant of the Jinja template if it's present in the GGUF metadata). You can check which template is defined by inspecting `http://localhost:8080/props`, and inspect the logs for `Tool call style: `.
 
@@ -23,31 +24,35 @@ Here's how to run an agent w/ local tool call:
   # and consume more tokens)
 
   ./build/bin/llama-server --jinja -fa --verbose \
-    -hfr bartowski/Qwen2.5-7B-Instruct-GGUF -hff Qwen2.5-7B-Instruct-Q4_K_M.gguf
-
-  ./build/bin/llama-server --jinja -fa --verbose \
-    -hfr NousResearch/Hermes-3-Llama-3.1-8B-GGUF -hff Hermes-3-Llama-3.1-8B.Q4_K_M.gguf \
-    --chat-template-file tests/chat/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja
-
-  ./build/bin/llama-server --jinja -fa --verbose \
-    -hfr meetkai/functionary-small-v3.2-GGUF -hff functionary-small-v3.2.Q8_0.gguf \
-    --chat-template-file tests/chat/templates/meetkai-functionary-medium-v3.2.jinja
-
-  ./build/bin/llama-server --jinja -fa --verbose \
-    -hfr lmstudio-community/Llama-3.2-3B-Instruct-GGUF -hff Llama-3.2-3B-Instruct-Q6_K.gguf \
-    --chat-template-file tests/chat/templates/meta-llama-Llama-3.2-3B-Instruct.jinja
+    -hfr mav23/llama-3-firefunction-v2-GGUF -hff llama-3-firefunction-v2.Q4_K_M.gguf \
+    --chat-template-file <( python scripts/get_hf_chat_template.py fireworks-ai/firellama-3-firefunction-v2 )
 
   # Note the --special flag: this is needed b/c of a regression from the last merge, will fix!
-  ./build/bin/llama-server --jinja -fa --verbose --special \
+  ./llama-server --jinja -fa --special \
     -hfr bartowski/Mistral-Nemo-Instruct-2407-GGUF -hff Mistral-Nemo-Instruct-2407-Q8_0.gguf \
-    --chat-template-file tests/chat/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja
+    --chat-template-file <( python scripts/get_hf_chat_template.py mistralai/Mistral-Nemo-Instruct-2407 )
+
+  ./llama-server --jinja -fa \
+    -hfr NousResearch/Hermes-3-Llama-3.1-8B-GGUF -hff Hermes-3-Llama-3.1-8B.Q4_K_M.gguf \
+    --chat-template-file <( python scripts/get_hf_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use )
+
+  ./llama-server --jinja -fa \
+    -hfr meetkai/functionary-small-v3.2-GGUF -hff functionary-small-v3.2.Q8_0.gguf \
+    --chat-template-file <( python scripts/get_hf_chat_template.py meetkai/functionary-medium-v3.2 )
+
+  ./llama-server --jinja -fa \
+    -hfr bartowski/Qwen2.5-7B-Instruct-GGUF -hff Qwen2.5-7B-Instruct-Q4_K_M.gguf
+
+  ./llama-server --jinja -fa \
+    -hfr lmstudio-community/Llama-3.2-3B-Instruct-GGUF -hff Llama-3.2-3B-Instruct-Q6_K.gguf \
+    --chat-template-file <( python scripts/get_hf_chat_template.py meta-llama/Llama-3.2-3B-Instruct )
 
   # Generic support, e.g. Phi 3.5, Gemma 2b, but really anything goes
 
-  ./build/bin/llama-server --jinja -fa --verbose \
+  ./llama-server --jinja -fa \
     -hfr bartowski/Phi-3.5-mini-instruct-GGUF -hff Phi-3.5-mini-instruct-Q4_K_M.gguf
 
-  ./build/bin/llama-server --jinja -fa --verbose \
+  ./llama-server --jinja -fa \
     -hfr bartowski/gemma-2-2b-it-GGUF -hff gemma-2-2b-it-Q4_K_M.gguf
   ```
 
diff --git a/tests/chat/templates/fireworks-ai-llama-3-firefunction-v2.jinja b/tests/chat/templates/fireworks-ai-llama-3-firefunction-v2.jinja
new file mode 100644
index 000000000..9b8136df7
--- /dev/null
+++ b/tests/chat/templates/fireworks-ai-llama-3-firefunction-v2.jinja
@@ -0,0 +1,57 @@
+{%- set loop_messages = messages -%}
+{%- set message_roles = ['system', 'user', 'assistant', 'tool'] -%}
+{%- set system_prompt_suffix -%}
+{%- filter trim -%}
+In addition to plain text responses, you can chose to call one or more of the provided functions.
+
+Use the following rule to decide when to call a function:
+  * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so
+  * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls
+
+If you decide to call functions:
+  * prefix function calls with functools marker (no closing marker required)
+  * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...]
+  * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples
+  * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0
+  * make sure you pick the right functions that match the user intent
+
+Available functions as JSON spec:
+{%- endfilter -%}
+{%- endset -%}
+{%- set system_prompt_suffix = system_prompt_suffix + "\n" + functions -%}
+{%- set system_prompt_suffix = system_prompt_suffix + '\nToday is ' + datetime + '.' -%}
+{%- set ns = namespace(role='', content='') -%}
+{#- Basic consistency checks -#}
+{%- if not loop_messages -%}
+  {{ raise_exception('Expected non-empty messages') }}
+{%- endif -%}
+{%- for message in loop_messages -%}
+  {%- set ns.role = message['role'] | lower -%}
+  {%- if ns.role not in message_roles -%}
+    {%- set message_roles_string = message_roles | join(', ') -%}
+    {{ raise_exception('Invalid role ' + message['role'] + '. Only ' + message_roles_string + ' are supported.') }}
+  {%- endif -%}
+  {%- set msg_content = message['content'] | default('', true) | trim -%}
+  {%- if loop.index0 == 0 -%}
+    {%- if ns.role == 'system' -%}
+      {%- set system_prompt = '<|start_header_id|>' + 'system' + '<|end_header_id|>\n\n' + message['content'] | trim + '\n' + system_prompt_suffix + '<|eot_id|>' -%}
+    {%- else -%}
+      {%- set system_prompt = '<|start_header_id|>' + 'system' + '<|end_header_id|>\n\nYou are a helpful assistant with access to functions.\n' + system_prompt_suffix + '<|eot_id|>' -%}
+    {%- endif -%}
+    {%- set ns.content = bos_token + system_prompt -%}
+    {{- ns.content -}}
+  {%- endif -%}
+  {%- if loop.index0 > 0 or ns.role != 'system' -%}
+    {%- set ns.content = '<|start_header_id|>' + ns.role + '<|end_header_id|>\n\n' + msg_content -%}
+    {%- if 'tool_calls' in message and message['tool_calls'] -%}
+      {%- set tool = namespace(calls=[]) -%}
+      {%- for call in message['tool_calls'] -%}
+        {%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments'] + '}'] -%}
+      {%- endfor -%}
+      {%- set ns.content = ns.content + ' functools[' + tool.calls | join(', ') + ']' -%}
+    {%- endif -%}
+    {%- set ns.content = ns.content + '<|eot_id|>' -%}
+    {{- ns.content -}}
+  {%- endif -%}
+{%- endfor -%}
+{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
diff --git a/tests/test-tool-call.cpp b/tests/test-tool-call.cpp
index c81a4c15a..d112e395e 100644
--- a/tests/test-tool-call.cpp
+++ b/tests/test-tool-call.cpp
@@ -306,10 +306,11 @@ static void test_parsing() {
       "Bleh[TOOL_CALLS][{\"arguments\": {\"arg1\": 1}, \"name\": \"special_function\", \"id\": \"123456789\"}]",
       "Bleh",
       json::array({special_function_call_with_id}));
-    test_parse_tool_call(llama_tool_call_style::MistralNemo, tools,
-      "[{\"arguments\": {\"arg1\": 1}, \"name\": \"special_function\", \"id\": \"123456789\"}]",
-      "",
-      json::array({special_function_call_with_id}));
+
+    test_parse_tool_call(llama_tool_call_style::FirefunctionV2, tools,
+      "Bleh functools[{\"arguments\": {\"arg1\": 1}, \"name\": \"special_function\"}]",
+      "Bleh",
+      json::array({special_function_call}));
 }
 
 static void test_tool_call_style(const std::string & template_file, llama_tool_call_style expected) {
@@ -322,6 +323,7 @@ static void test_tool_call_style(const std::string & template_file, llama_tool_c
 static void test_tool_call_style_detection() {
     test_tool_call_style("tests/chat/templates/meetkai-functionary-medium-v3.1.jinja", FunctionaryV3Llama31);
     test_tool_call_style("tests/chat/templates/meetkai-functionary-medium-v3.2.jinja", FunctionaryV3Llama3);
+    test_tool_call_style("tests/chat/templates/fireworks-ai-llama-3-firefunction-v2.jinja", FirefunctionV2);
     test_tool_call_style("tests/chat/templates/meta-llama-Meta-Llama-3.1-8B-Instruct.jinja", Llama31);
     test_tool_call_style("tests/chat/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", Llama32);
     test_tool_call_style("tests/chat/templates/Qwen-Qwen2.5-7B-Instruct.jinja", Hermes2Pro);
@@ -414,6 +416,7 @@ static void test_grammars() {
   test_template("tests/chat/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", "<s>", "</s>", { "<|eom_id|>", "<|eot_id|>" }, tool_call_message, tools);
   test_template("tests/chat/templates/meetkai-functionary-medium-v3.1.jinja", "<s>", "</s>", { "<|eom_id|>", "<|eot_id|>" }, tool_call_message, tools);
   test_template("tests/chat/templates/meetkai-functionary-medium-v3.2.jinja", "<s>", "</s>", { "<|eom_id|>", "<|eot_id|>" }, tool_call_message, tools);
+  test_template("tests/chat/templates/fireworks-ai-llama-3-firefunction-v2.jinja", "<s>", "</s>", { "<|eot_id|>" }, tool_call_message, tools);
   test_template("tests/chat/templates/google-gemma-2-2b-it.jinja", "<s>", "</s>", { "<end_of_turn>" }, tool_call_message_with_id, tools);
   test_template("tests/chat/templates/microsoft-Phi-3.5-mini-instruct.jinja", "<s>", "</s>", { "<|end|>" }, tool_call_message_with_id, tools);
 }