llama : move vocab, grammar and sampling into separate files (#8508)

* llama : move sampling code into llama-sampling

ggml-ci

* llama : move grammar code into llama-grammar

ggml-ci

* cont

ggml-ci

* cont : pre-fetch rules

* cont

ggml-ci

* llama : deprecate llama_sample_grammar

* llama : move tokenizers into llama-vocab

ggml-ci

* make : update llama.cpp deps [no ci]

* llama : redirect external API to internal APIs

ggml-ci

* llama : suffix the internal APIs with "_impl"

ggml-ci

* llama : clean-up
This commit is contained in:
Georgi Gerganov 2024-07-23 13:10:17 +03:00 committed by GitHub
parent 751fcfc6c3
commit 938943cdbf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 3521 additions and 2968 deletions

View file

@ -330,7 +330,7 @@ static llama_token llama_sampling_sample_impl(
llama_token_data_array single_token_data_array = { &single_token_data, 1, false };
// Apply grammar constraints to the single token
llama_sample_grammar(ctx_main, &single_token_data_array, ctx_sampling->grammar);
llama_grammar_sample(ctx_sampling->grammar, ctx_main, &single_token_data_array);
// Check if the token is valid according to the grammar by seeing if its logit has been set to -INFINITY
bool is_valid = single_token_data_array.data[0].logit != -INFINITY;
@ -421,7 +421,7 @@ static llama_token_data_array llama_sampling_prepare_impl(
// apply grammar checks before sampling logic
if (apply_grammar && ctx_sampling->grammar != NULL) {
llama_sample_grammar(ctx_main, &cur_p, ctx_sampling->grammar);
llama_grammar_sample(ctx_sampling->grammar, ctx_main, &cur_p);
}
return cur_p;
@ -455,6 +455,6 @@ void llama_sampling_accept(
ctx_sampling->prev.push_back(id);
if (ctx_sampling->grammar != NULL && apply_grammar) {
llama_grammar_accept_token(ctx_main, ctx_sampling->grammar, id);
llama_grammar_accept_token(ctx_sampling->grammar, ctx_main, id);
}
}