llama : refactor sampling v2 (#9294)

- Add `struct llama_sampler` and `struct llama_sampler_i`
- Add `llama_sampler_` API
- Add `llama_sampler_chain_` API for chaining multiple samplers
- Remove `LLAMA_API_INTERNAL`
- Add `llama_perf_` API and remove old `llama_print_timings` and `llama_reset_timings`
This commit is contained in:
Georgi Gerganov 2024-09-07 15:16:19 +03:00 committed by GitHub
parent 947538acb8
commit df270ef745
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
48 changed files with 3497 additions and 2914 deletions

View file

@ -3,7 +3,7 @@
#endif
#include "llama.h"
#include "grammar-parser.h"
#include "llama-grammar.h"
#include <cassert>
@ -22,7 +22,8 @@ static const char * type_str(llama_gretype type) {
static void verify_parsing(const char *grammar_bytes, const std::vector<std::pair<std::string, uint32_t>> expected, const std::vector<llama_grammar_element> &expected_rules) {
uint32_t index = 0;
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_bytes);
llama_grammar_parser parsed_grammar;
parsed_grammar.parse(grammar_bytes);
std::map<uint32_t, std::string> symbol_names;
for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it) {
@ -129,9 +130,10 @@ static void verify_parsing(const char *grammar_bytes, const std::vector<std::pai
}
}
static void verify_failure(const char *grammar_bytes) {
static void verify_failure(const char * grammar_bytes) {
fprintf(stderr, "Testing expected failure:%s\n", grammar_bytes);
auto result = grammar_parser::parse(grammar_bytes);
llama_grammar_parser result;
result.parse(grammar_bytes);
assert(result.rules.empty() && "should have failed");
}