From 47e37dd955879d27d4037222bdf18cec123a900b Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Thu, 11 Apr 2024 15:11:40 +0100 Subject: [PATCH] grammars: reuse new_stacks --- llama.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/llama.cpp b/llama.cpp index a11bf78dc..13a6807fc 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11912,12 +11912,13 @@ static void llama_grammar_advance_stack( // be positioned at a character range (see `llama_grammar_advance_stack`), and // produces the N possible stacks if the given char is accepted at those // positions -std::vector> llama_grammar_accept( +void llama_grammar_accept( const std::vector> & rules, const std::vector> & stacks, - const uint32_t chr) { + const uint32_t chr, + std::vector> & new_stacks) { - std::vector> new_stacks; + new_stacks.clear(); for (const auto & stack : stacks) { if (stack.empty()) { @@ -11936,8 +11937,6 @@ std::vector> llama_grammar_accept( llama_grammar_advance_stack(rules, new_stack, new_stacks); } } - - return new_stacks; } static std::vector llama_grammar_reject_candidates( @@ -12774,8 +12773,10 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar // Note terminating 0 in decoded string const auto decoded = decode_utf8(piece, grammar->partial_utf8); const auto & code_points = decoded.first; + std::vector> tmp_new_stacks; for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { - grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it); + llama_grammar_accept(grammar->rules, grammar->stacks, *it, tmp_new_stacks); + tmp_new_stacks.swap(grammar->stacks); } grammar->partial_utf8 = decoded.second; GGML_ASSERT(!grammar->stacks.empty());