llama : move vocab, grammar and sampling into separate files (#8508)

* llama : move sampling code into llama-sampling ggml-ci * llama : move grammar code into llama-grammar ggml-ci * cont ggml-ci * cont : pre-fetch rules * cont ggml-ci * llama : deprecate llama_sample_grammar * llama : move tokenizers into llama-vocab ggml-ci * make : update llama.cpp deps [no ci] * llama : redirect external API to internal APIs ggml-ci * llama : suffix the internal APIs with "_impl" ggml-ci * llama : clean-up
2024-07-23 13:10:17 +03:00 · 2024-07-23 13:10:17 +03:00 · 938943cdbf
commit 938943cdbf
parent 751fcfc6c3
18 changed files with 3521 additions and 2968 deletions
--- a/src/llama-grammar.h
+++ b/src/llama-grammar.h
@ -0,0 +1,41 @@
+#pragma once
+
+#include "llama-impl.h"
+
+struct llama_vocab;
+struct llama_sampling;
+
+struct llama_grammar {
+    const llama_grammar_rules  rules;
+          llama_grammar_stacks stacks;
+
+    // buffer for partially generated UTF-8 sequence from accepted tokens
+    llama_partial_utf8 partial_utf8;
+};
+
+struct llama_grammar * llama_get_grammar(struct llama_context * ctx);
+
+//
+// internal API
+//
+
+struct llama_grammar * llama_grammar_init_impl(
+            const llama_grammar_element ** rules,
+                                 size_t    n_rules,
+                                 size_t    start_rule_index);
+
+void llama_grammar_free_impl(struct llama_grammar * grammar);
+
+struct llama_grammar * llama_grammar_copy_impl(const struct llama_grammar * grammar);
+
+void llama_grammar_sample_impl(
+        const struct llama_grammar * grammar,
+          const struct llama_vocab * vocab,
+       const struct llama_sampling * smpl,
+            llama_token_data_array * candidates);
+
+void llama_grammar_accept_token_impl(
+              struct llama_grammar * grammar,
+          const struct llama_vocab * vocab,
+       const struct llama_sampling * smpl,
+                       llama_token   token);