llama : refactor sampling v2 (#9294)
- Add `struct llama_sampler` and `struct llama_sampler_i` - Add `llama_sampler_` API - Add `llama_sampler_chain_` API for chaining multiple samplers - Remove `LLAMA_API_INTERNAL` - Add `llama_perf_` API and remove old `llama_print_timings` and `llama_reset_timings`
This commit is contained in:
parent
947538acb8
commit
df270ef745
48 changed files with 3497 additions and 2914 deletions
|
@ -2,33 +2,18 @@
|
|||
#undef NDEBUG
|
||||
#endif
|
||||
|
||||
#define LLAMA_API_INTERNAL
|
||||
|
||||
#include "ggml.h"
|
||||
#include "llama.h"
|
||||
#include "grammar-parser.h"
|
||||
#include "json-schema-to-grammar.h"
|
||||
#include "unicode.h"
|
||||
#include "llama-grammar.h"
|
||||
#include "json-schema-to-grammar.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
static llama_grammar* build_grammar(const std::string & grammar_str) {
|
||||
auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
||||
|
||||
// Ensure we parsed correctly
|
||||
assert(!parsed_grammar.rules.empty());
|
||||
|
||||
// Ensure we have a root node
|
||||
assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
|
||||
|
||||
std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
|
||||
llama_grammar* grammar = llama_grammar_init(
|
||||
grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
||||
|
||||
return grammar;
|
||||
static llama_grammar * build_grammar(const std::string & grammar_str) {
|
||||
return llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root");
|
||||
}
|
||||
|
||||
static bool test_build_grammar_fails(const std::string & grammar_str) {
|
||||
|
@ -45,25 +30,23 @@ static bool test_build_grammar_fails(const std::string & grammar_str) {
|
|||
}
|
||||
|
||||
static bool match_string(const std::string & input, llama_grammar * grammar) {
|
||||
auto decoded = decode_utf8(input, {});
|
||||
|
||||
const auto & code_points = decoded.first;
|
||||
const auto cpts = unicode_cpts_from_utf8(input);
|
||||
|
||||
const llama_grammar_rules & rules = llama_grammar_get_rules (grammar);
|
||||
llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
|
||||
llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
|
||||
|
||||
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
|
||||
const llama_grammar_stacks prev_stacks = llama_grammar_get_stacks(grammar); // copy
|
||||
for (const auto & cpt : cpts) {
|
||||
const llama_grammar_stacks stacks_prev = llama_grammar_get_stacks(grammar); // copy
|
||||
|
||||
llama_grammar_accept(rules, prev_stacks, *it, cur_stacks);
|
||||
llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
|
||||
|
||||
if (cur_stacks.empty()) {
|
||||
if (stacks_cur.empty()) {
|
||||
// no stacks means that the grammar failed to match at this point
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & stack : cur_stacks) {
|
||||
for (const auto & stack : stacks_cur) {
|
||||
if (stack.empty()) {
|
||||
// An empty stack means that the grammar has been completed
|
||||
return true;
|
||||
|
@ -77,12 +60,12 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
|
|||
fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
|
||||
fflush(stderr);
|
||||
|
||||
auto grammar = build_grammar(grammar_str);
|
||||
auto * grammar = build_grammar(grammar_str);
|
||||
|
||||
// Save the original grammar stacks so that we can reset after every new string we want to test
|
||||
const llama_grammar_stacks original_stacks = llama_grammar_get_stacks(grammar);
|
||||
const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar);
|
||||
|
||||
llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
|
||||
llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
|
||||
|
||||
fprintf(stderr, " 🔵 Valid strings:\n");
|
||||
|
||||
|
@ -119,7 +102,7 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
|
|||
assert(matched);
|
||||
|
||||
// Reset the grammar stacks
|
||||
cur_stacks = original_stacks;
|
||||
stacks_cur = stacks_org;
|
||||
}
|
||||
|
||||
fprintf(stderr, " 🟠 Invalid strings:\n");
|
||||
|
@ -139,11 +122,11 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
|
|||
assert(!matched);
|
||||
|
||||
// Reset the grammar stacks
|
||||
cur_stacks = original_stacks;
|
||||
stacks_cur = stacks_org;
|
||||
}
|
||||
|
||||
// Clean up allocated memory
|
||||
llama_grammar_free(grammar);
|
||||
llama_grammar_free_impl(grammar);
|
||||
}
|
||||
static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
|
||||
test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
|
||||
|
@ -683,7 +666,8 @@ static void test_failure_missing_root() {
|
|||
term ::= number
|
||||
number ::= [0-9]+)""";
|
||||
|
||||
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
||||
llama_grammar_parser parsed_grammar;
|
||||
parsed_grammar.parse(grammar_str.c_str());
|
||||
|
||||
// Ensure we parsed correctly
|
||||
assert(!parsed_grammar.rules.empty());
|
||||
|
@ -705,7 +689,8 @@ static void test_failure_missing_reference() {
|
|||
|
||||
fprintf(stderr, " Expected error: ");
|
||||
|
||||
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
||||
llama_grammar_parser parsed_grammar;
|
||||
parsed_grammar.parse(grammar_str.c_str());
|
||||
|
||||
// Ensure we did NOT parsed correctly
|
||||
assert(parsed_grammar.rules.empty());
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue