llama : move vocab, grammar and sampling into separate files (#8508)

* llama : move sampling code into llama-sampling

ggml-ci

* llama : move grammar code into llama-grammar

ggml-ci

* cont

ggml-ci

* cont : pre-fetch rules

* cont

ggml-ci

* llama : deprecate llama_sample_grammar

* llama : move tokenizers into llama-vocab

ggml-ci

* make : update llama.cpp deps [no ci]

* llama : redirect external API to internal APIs

ggml-ci

* llama : suffix the internal APIs with "_impl"

ggml-ci

* llama : clean-up
This commit is contained in:
Georgi Gerganov 2024-07-23 13:10:17 +03:00 committed by GitHub
parent 751fcfc6c3
commit 938943cdbf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 3521 additions and 2968 deletions

View file

@ -44,21 +44,26 @@ static bool test_build_grammar_fails(const std::string & grammar_str) {
return grammar_fails;
}
static bool match_string(const std::string & input, llama_grammar* grammar) {
static bool match_string(const std::string & input, llama_grammar * grammar) {
auto decoded = decode_utf8(input, {});
const auto & code_points = decoded.first;
const llama_grammar_rules & rules = llama_grammar_get_rules (grammar);
llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
auto prev_stacks = grammar->stacks;
llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
if (grammar->stacks.empty()) {
const llama_grammar_stacks prev_stacks = llama_grammar_get_stacks(grammar); // copy
llama_grammar_accept(rules, prev_stacks, *it, cur_stacks);
if (cur_stacks.empty()) {
// no stacks means that the grammar failed to match at this point
return false;
}
}
for (const auto & stack : grammar->stacks) {
for (const auto & stack : cur_stacks) {
if (stack.empty()) {
// An empty stack means that the grammar has been completed
return true;
@ -75,7 +80,9 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
auto grammar = build_grammar(grammar_str);
// Save the original grammar stacks so that we can reset after every new string we want to test
auto original_stacks = grammar->stacks;
const llama_grammar_stacks original_stacks = llama_grammar_get_stacks(grammar);
llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
fprintf(stderr, " 🔵 Valid strings:\n");
@ -112,7 +119,7 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
assert(matched);
// Reset the grammar stacks
grammar->stacks = original_stacks;
cur_stacks = original_stacks;
}
fprintf(stderr, " 🟠 Invalid strings:\n");
@ -132,7 +139,7 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
assert(!matched);
// Reset the grammar stacks
grammar->stacks = original_stacks;
cur_stacks = original_stacks;
}
// Clean up allocated memory