grammars: 1.5x faster inference w/ complex grammars (vector reserves / reuses) (#6609)

* grammars: reserve rejects & next candidates

* grammars: reuse new_stacks

* grammars: fix missing sig change in llama.h

* grammars: fix test (api changed)

* grammars: update gbnf-validator.cpp

* grammars: simpler syntax (no swap)
This commit is contained in:
Olivier Chafik 2024-04-11 19:47:34 +01:00 committed by GitHub
parent 1bbdaf6ecd
commit cbaadc9294
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 17 additions and 12 deletions

View file

@ -17,7 +17,7 @@ static bool llama_sample_grammar_string(struct llama_grammar * grammar, const st
size_t pos = 0;
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
auto prev_stacks = grammar->stacks;
grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
if (grammar->stacks.empty()) {
error_pos = pos;
error_msg = "Unexpected character '" + unicode_cpt_to_utf8(*it) + "'";