grammars: 1.5x faster inference w/ complex grammars (vector reserves / reuses) (#6609)

* grammars: reserve rejects & next candidates

* grammars: reuse new_stacks

* grammars: fix missing sig change in llama.h

* grammars: fix test (api changed)

* grammars: update gbnf-validator.cpp

* grammars: simpler syntax (no swap)
This commit is contained in:
Olivier Chafik 2024-04-11 19:47:34 +01:00 committed by GitHub
parent 1bbdaf6ecd
commit cbaadc9294
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 17 additions and 12 deletions

View file

@ -1097,10 +1097,11 @@ const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal
struct llama_context * ctx
);
std::vector<std::vector<const llama_grammar_element *>> llama_grammar_accept(
void llama_grammar_accept(
const std::vector<std::vector<llama_grammar_element>> & rules,
const std::vector<std::vector<const llama_grammar_element *>> & stacks,
const uint32_t chr);
const uint32_t chr,
std::vector<std::vector<const llama_grammar_element *>> & new_stacks);
std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
const std::string & src,