llama : move vocab, grammar and sampling into separate files (#8508)

* llama : move sampling code into llama-sampling ggml-ci * llama : move grammar code into llama-grammar ggml-ci * cont ggml-ci * cont : pre-fetch rules * cont ggml-ci * llama : deprecate llama_sample_grammar * llama : move tokenizers into llama-vocab ggml-ci * make : update llama.cpp deps [no ci] * llama : redirect external API to internal APIs ggml-ci * llama : suffix the internal APIs with "_impl" ggml-ci * llama : clean-up
2024-07-23 13:10:17 +03:00 · 2024-07-23 13:10:17 +03:00 · 938943cdbf
commit 938943cdbf
parent 751fcfc6c3
18 changed files with 3521 additions and 2968 deletions
--- a/tests/test-grammar-integration.cpp
+++ b/tests/test-grammar-integration.cpp
@ -44,21 +44,26 @@ static bool test_build_grammar_fails(const std::string & grammar_str) {
    return grammar_fails;
 }

-static bool match_string(const std::string & input, llama_grammar* grammar) {
+static bool match_string(const std::string & input, llama_grammar * grammar) {
    auto decoded = decode_utf8(input, {});

    const auto & code_points = decoded.first;

+    const llama_grammar_rules  & rules      = llama_grammar_get_rules (grammar);
+          llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
+
    for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
-        auto prev_stacks = grammar->stacks;
-        llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
-        if (grammar->stacks.empty()) {
+        const llama_grammar_stacks prev_stacks = llama_grammar_get_stacks(grammar); // copy
+
+        llama_grammar_accept(rules, prev_stacks, *it, cur_stacks);
+
+        if (cur_stacks.empty()) {
            // no stacks means that the grammar failed to match at this point
            return false;
        }
    }

-    for (const auto & stack : grammar->stacks) {
+    for (const auto & stack : cur_stacks) {
        if (stack.empty()) {
            // An empty stack means that the grammar has been completed
            return true;
@ -75,7 +80,9 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
    auto grammar = build_grammar(grammar_str);

    // Save the original grammar stacks so that we can reset after every new string we want to test
-    auto original_stacks = grammar->stacks;
+    const llama_grammar_stacks original_stacks = llama_grammar_get_stacks(grammar);
+
+    llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);

    fprintf(stderr, "  🔵 Valid strings:\n");

@ -112,7 +119,7 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
        assert(matched);

        // Reset the grammar stacks
-        grammar->stacks = original_stacks;
+        cur_stacks = original_stacks;
    }

    fprintf(stderr, "  🟠 Invalid strings:\n");
@ -132,7 +139,7 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
        assert(!matched);

        // Reset the grammar stacks
-        grammar->stacks = original_stacks;
+        cur_stacks = original_stacks;
    }

    // Clean up allocated memory