From 05bbba9f8a0ebabcf7e7d573405e78c3511cc7c0 Mon Sep 17 00:00:00 2001 From: ochafik Date: Sat, 28 Sep 2024 19:05:10 +0100 Subject: [PATCH] `tool-call`: only match json eagerly for Llama 3.2 --- common/tool-call.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/tool-call.cpp b/common/tool-call.cpp index 559c6653b..b0f4698e7 100644 --- a/common/tool-call.cpp +++ b/common/tool-call.cpp @@ -223,10 +223,10 @@ llama_tool_call_handler llama_tool_call_handler_init( auto uses_python_tag = tmpl.tool_call_style() == llama_tool_call_style::Llama31; // Technically we should only trigger on `"\n{\"name\": \"" + name + "\""` for each tool name, - // but Llama-3.2-3B struggles to output valid tool calls so we're "guiding" it strongly as soon + // but Llama-3.2-3B (and 1B) struggles to output valid tool calls so we're "guiding" it strongly as soon // as it seems to be outputting some JSON. // TODO: make this conditional on a very small model (e.g. 1B / 3B). - auto eagerly_match_any_json = true; + auto eagerly_match_any_json = tmpl.tool_call_style() == llama_tool_call_style::Llama32; handler.grammar = build_grammar([&](const llama_grammar_builder & builder) { std::vector tool_rules;