grammars: 1.5x faster inference w/ complex grammars (vector reserves / reuses) (#6609)
* grammars: reserve rejects & next candidates * grammars: reuse new_stacks * grammars: fix missing sig change in llama.h * grammars: fix test (api changed) * grammars: update gbnf-validator.cpp * grammars: simpler syntax (no swap)
This commit is contained in:
parent
1bbdaf6ecd
commit
cbaadc9294
4 changed files with 17 additions and 12 deletions
5
llama.h
5
llama.h
|
@ -1097,10 +1097,11 @@ const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal
|
|||
struct llama_context * ctx
|
||||
);
|
||||
|
||||
std::vector<std::vector<const llama_grammar_element *>> llama_grammar_accept(
|
||||
void llama_grammar_accept(
|
||||
const std::vector<std::vector<llama_grammar_element>> & rules,
|
||||
const std::vector<std::vector<const llama_grammar_element *>> & stacks,
|
||||
const uint32_t chr);
|
||||
const uint32_t chr,
|
||||
std::vector<std::vector<const llama_grammar_element *>> & new_stacks);
|
||||
|
||||
std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
|
||||
const std::string & src,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue